diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f67be53643..a05fcb1241c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# openpipelines x.x.x + +# MINOR CHANGES + +* Several component (cleanup): remove workaround for using being able to use shared utility functions with Nextflow Fusion (PR #920). + # openpipelines 2.0.0-rc.2 ## BUG FIXES diff --git a/src/annotate/celltypist/script.py b/src/annotate/celltypist/script.py index 5ef096af8d6..211ec55ad56 100644 --- a/src/annotate/celltypist/script.py +++ b/src/annotate/celltypist/script.py @@ -27,18 +27,8 @@ meta = { } ## VIASH END - -# START TEMPORARY WORKAROUND setup_logger -def setup_logger(): - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(sys.stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +sys.path.append(meta["resources_dir"]) +from setup_logger import setup_logger def check_celltypist_format(indata): if np.abs(np.expm1(indata[0]).sum()-10000) > 1: diff --git a/src/annotate/onclass/script.py b/src/annotate/onclass/script.py index ca857fe43ff..f740e786bb1 100644 --- a/src/annotate/onclass/script.py +++ b/src/annotate/onclass/script.py @@ -31,17 +31,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -def setup_logger(): - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(sys.stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() diff --git a/src/annotate/popv/script.py b/src/annotate/popv/script.py index 51fcf7974fa..d3d8b5e0ad6 100644 --- a/src/annotate/popv/script.py +++ b/src/annotate/popv/script.py @@ -52,22 +52,7 @@ def mps_is_available(): ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() use_gpu = cuda_is_available() diff --git a/src/annotate/random_forest_annotation/script.py b/src/annotate/random_forest_annotation/script.py index 545e5292efb..d1d73a10f12 100644 --- a/src/annotate/random_forest_annotation/script.py +++ b/src/annotate/random_forest_annotation/script.py @@ -30,17 +30,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -def setup_logger(): - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(sys.stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() diff --git a/src/annotate/scanvi/script.py b/src/annotate/scanvi/script.py index eb7344d771d..eadfe0a9f14 100644 --- a/src/annotate/scanvi/script.py +++ b/src/annotate/scanvi/script.py @@ -40,22 +40,7 @@ sys.path.append(meta["resources_dir"]) from query_reference_allignment import set_var_index, cross_check_genes -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() if (not par["scvi_reference_model"]) and not (par["scanvi_reference_model"]) or (par["scvi_reference_model"] and par["scanvi_reference_model"]): diff --git a/src/annotate/svm_annotation/script.py b/src/annotate/svm_annotation/script.py index dd1dd2a9e55..99048a2a459 100644 --- a/src/annotate/svm_annotation/script.py +++ b/src/annotate/svm_annotation/script.py @@ -32,17 +32,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -def setup_logger(): - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(sys.stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() diff --git a/src/cluster/leiden/config.vsh.yaml b/src/cluster/leiden/config.vsh.yaml index cc2309f388f..a5fcba8beae 100644 --- a/src/cluster/leiden/config.vsh.yaml +++ b/src/cluster/leiden/config.vsh.yaml @@ -71,6 +71,7 @@ resources: - type: python_script path: script.py - path: /src/utils/setup_logger.py + - path: /src/utils/compress_h5mu.py test_resources: - type: python_script path: test.py diff --git a/src/cluster/leiden/script.py b/src/cluster/leiden/script.py index e085912d2a3..90e35f7a5a1 100644 --- a/src/cluster/leiden/script.py +++ b/src/cluster/leiden/script.py @@ -37,57 +37,7 @@ sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND compress_h5mu -# reason: resources aren't available when using Nextflow fusion -# from compress_h5mu import compress_h5mu -from h5py import File as H5File -from h5py import Group, Dataset -from typing import Union, Literal -from functools import partial - -def compress_h5mu(input_path: Union[str, Path], - output_path: Union[str, Path], - compression: Union[Literal['gzip'], Literal['lzf']]): - input_path, output_path = str(input_path), str(output_path) - - def copy_attributes(in_object, out_object): - for key, value in in_object.attrs.items(): - out_object.attrs[key] = value - - def visit_path(output_h5: H5File, - compression: Union[Literal['gzip'], Literal['lzf']], - name: str, object: Union[Group, Dataset]): - if isinstance(object, Group): - new_group = output_h5.create_group(name) - copy_attributes(object, new_group) - elif isinstance(object, Dataset): - # Compression only works for non-scalar Dataset objects - # Scalar objects dont have a shape defined - if not object.compression and object.shape not in [None, ()]: - new_dataset = output_h5.create_dataset(name, data=object, compression=compression) - copy_attributes(object, new_dataset) - else: - output_h5.copy(object, name) - else: - raise NotImplementedError(f"Could not copy element {name}, " - f"type has not been implemented yet: {type(object)}") - - with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: - copy_attributes(input_h5, output_h5) - input_h5.visititems(partial(visit_path, output_h5, compression)) - - with open(input_path, "rb") as input_bytes: - # Mudata puts metadata like this in the first 512 bytes: - # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) - # See mudata/_core/io.py, read_h5mu() function - starting_metadata = input_bytes.read(100) - # The metadata is padded with extra null bytes up until 512 bytes - truncate_location = starting_metadata.find(b"\x00") - starting_metadata = starting_metadata[:truncate_location] - with open(output_path, "br+") as f: - nbytes = f.write(starting_metadata) - f.write(b"\0" * (512 - nbytes)) -# END TEMPORARY WORKAROUND compress_h5mu +from compress_h5mu import compress_h5mu _shared_logger_name = "leiden" diff --git a/src/compression/compress_h5mu/script.py b/src/compression/compress_h5mu/script.py index 196082ad2c0..99b9840ff87 100644 --- a/src/compression/compress_h5mu/script.py +++ b/src/compression/compress_h5mu/script.py @@ -9,59 +9,7 @@ ### VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND compress_h5mu -# reason: resources aren't available when using Nextflow fusion -# from compress_h5mu import compress_h5mu -from h5py import File as H5File -from h5py import Group, Dataset -from pathlib import Path -from typing import Union, Literal -from functools import partial - - -def compress_h5mu(input_path: Union[str, Path], - output_path: Union[str, Path], - compression: Union[Literal['gzip'], Literal['lzf']]): - input_path, output_path = str(input_path), str(output_path) - - def copy_attributes(in_object, out_object): - for key, value in in_object.attrs.items(): - out_object.attrs[key] = value - - def visit_path(output_h5: H5File, - compression: Union[Literal['gzip'], Literal['lzf']], - name: str, object: Union[Group, Dataset]): - if isinstance(object, Group): - new_group = output_h5.create_group(name) - copy_attributes(object, new_group) - elif isinstance(object, Dataset): - # Compression only works for non-scalar Dataset objects - # Scalar objects dont have a shape defined - if not object.compression and object.shape not in [None, ()]: - new_dataset = output_h5.create_dataset(name, data=object, compression=compression) - copy_attributes(object, new_dataset) - else: - output_h5.copy(object, name) - else: - raise NotImplementedError(f"Could not copy element {name}, " - f"type has not been implemented yet: {type(object)}") - - with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: - copy_attributes(input_h5, output_h5) - input_h5.visititems(partial(visit_path, output_h5, compression)) - - with open(input_path, "rb") as input_bytes: - # Mudata puts metadata like this in the first 512 bytes: - # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) - # See mudata/_core/io.py, read_h5mu() function - starting_metadata = input_bytes.read(100) - # The metadata is padded with extra null bytes up until 512 bytes - truncate_location = starting_metadata.find(b"\x00") - starting_metadata = starting_metadata[:truncate_location] - with open(output_path, "br+") as f: - nbytes = f.write(starting_metadata) - f.write(b"\0" * (512 - nbytes)) -# END TEMPORARY WORKAROUND compress_h5mu +from compress_h5mu import compress_h5mu if __name__ == "__main__": compress_h5mu(par["input"], par["output"], compression=par["compression"]) \ No newline at end of file diff --git a/src/convert/from_10xh5_to_h5mu/script.py b/src/convert/from_10xh5_to_h5mu/script.py index ecd8e4517d6..83e9a174c24 100755 --- a/src/convert/from_10xh5_to_h5mu/script.py +++ b/src/convert/from_10xh5_to_h5mu/script.py @@ -16,22 +16,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Reading %s.", par["input"]) diff --git a/src/convert/from_10xmtx_to_h5mu/script.py b/src/convert/from_10xmtx_to_h5mu/script.py index c6e855416e8..6677852ce83 100755 --- a/src/convert/from_10xmtx_to_h5mu/script.py +++ b/src/convert/from_10xmtx_to_h5mu/script.py @@ -10,22 +10,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Reading %s.", par["input"]) diff --git a/src/convert/from_cellranger_multi_to_h5mu/script.py b/src/convert/from_cellranger_multi_to_h5mu/script.py index 77047d1d6e6..71c2a76fddc 100644 --- a/src/convert/from_cellranger_multi_to_h5mu/script.py +++ b/src/convert/from_cellranger_multi_to_h5mu/script.py @@ -25,22 +25,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() POSSIBLE_LIBRARY_TYPES = ('vdj_t', 'vdj_b', 'vdj_t_gd', 'count', 'antigen_analysis', 'multiplexing_analysis') diff --git a/src/convert/from_h5ad_to_h5mu/script.py b/src/convert/from_h5ad_to_h5mu/script.py index 8a1d4884f1d..8ed51c78ab5 100755 --- a/src/convert/from_h5ad_to_h5mu/script.py +++ b/src/convert/from_h5ad_to_h5mu/script.py @@ -13,22 +13,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() assert len(par["input"]) == len(par["modality"]), "Number of input files should be the same length as the number of modalities" diff --git a/src/convert/from_h5mu_to_h5ad/script.py b/src/convert/from_h5mu_to_h5ad/script.py index f784220ba97..968a9ec38d0 100755 --- a/src/convert/from_h5mu_to_h5ad/script.py +++ b/src/convert/from_h5mu_to_h5ad/script.py @@ -11,22 +11,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() # TODO: Merge modalities into one layer diff --git a/src/correction/cellbender_remove_background/script.py b/src/correction/cellbender_remove_background/script.py index de169ddab10..a0ebd6ce97f 100644 --- a/src/correction/cellbender_remove_background/script.py +++ b/src/correction/cellbender_remove_background/script.py @@ -62,22 +62,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() diff --git a/src/correction/cellbender_remove_background_v0_2/script.py b/src/correction/cellbender_remove_background_v0_2/script.py index 67823b342ac..4b23074af65 100644 --- a/src/correction/cellbender_remove_background_v0_2/script.py +++ b/src/correction/cellbender_remove_background_v0_2/script.py @@ -53,22 +53,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() from helper import anndata_from_h5 diff --git a/src/dataflow/concatenate_h5mu/config.vsh.yaml b/src/dataflow/concatenate_h5mu/config.vsh.yaml index 7837fa54db7..689dae2a458 100644 --- a/src/dataflow/concatenate_h5mu/config.vsh.yaml +++ b/src/dataflow/concatenate_h5mu/config.vsh.yaml @@ -66,7 +66,7 @@ resources: - type: python_script path: script.py - path: /src/utils/setup_logger.py - # - path: /src/utils/compress_h5mu.py + - path: /src/utils/compress_h5mu.py test_resources: - type: python_script path: test.py diff --git a/src/dataflow/concatenate_h5mu/script.py b/src/dataflow/concatenate_h5mu/script.py index 15511375fc0..c74d2681122 100644 --- a/src/dataflow/concatenate_h5mu/script.py +++ b/src/dataflow/concatenate_h5mu/script.py @@ -28,74 +28,9 @@ ### VIASH END sys.path.append(meta["resources_dir"]) +from compress_h5mu import compress_h5mu +from setup_logger import setup_logger -# START TEMPORARY WORKAROUND compress_h5mu -# reason: resources aren't available when using Nextflow fusion - -# from compress_h5mu import compress_h5mu -from h5py import Group, Dataset -from typing import Union -from functools import partial - -def compress_h5mu(input_path: Union[str, Path], - output_path: Union[str, Path], - compression: Union[Literal['gzip'], Literal['lzf']]): - input_path, output_path = str(input_path), str(output_path) - - def copy_attributes(in_object, out_object): - for key, value in in_object.attrs.items(): - out_object.attrs[key] = value - - def visit_path(output_h5: H5File, - compression: Union[Literal['gzip'], Literal['lzf']], - name: str, object: Union[Group, Dataset]): - if isinstance(object, Group): - new_group = output_h5.create_group(name) - copy_attributes(object, new_group) - elif isinstance(object, Dataset): - # Compression only works for non-scalar Dataset objects - # Scalar objects dont have a shape defined - if not object.compression and object.shape not in [None, ()]: - new_dataset = output_h5.create_dataset(name, data=object, compression=compression) - copy_attributes(object, new_dataset) - else: - output_h5.copy(object, name) - else: - raise NotImplementedError(f"Could not copy element {name}, " - f"type has not been implemented yet: {type(object)}") - - with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: - copy_attributes(input_h5, output_h5) - input_h5.visititems(partial(visit_path, output_h5, compression)) - - with open(input_path, "rb") as input_bytes: - # Mudata puts metadata like this in the first 512 bytes: - # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) - # See mudata/_core/io.py, read_h5mu() function - starting_metadata = input_bytes.read(100) - # The metadata is padded with extra null bytes up until 512 bytes - truncate_location = starting_metadata.find(b"\x00") - starting_metadata = starting_metadata[:truncate_location] - with open(output_path, "br+") as f: - nbytes = f.write(starting_metadata) - f.write(b"\0" * (512 - nbytes)) -# END TEMPORARY WORKAROUND compress_h5mu - -# START TEMPORARY WORKAROUND setup_logger -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger logger = setup_logger() def nunique(row): diff --git a/src/dataflow/merge/script.py b/src/dataflow/merge/script.py index 73bf8d1ce78..454ae8be4a4 100644 --- a/src/dataflow/merge/script.py +++ b/src/dataflow/merge/script.py @@ -15,22 +15,7 @@ ### VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() def main(): diff --git a/src/dataflow/split_h5mu/config.vsh.yaml b/src/dataflow/split_h5mu/config.vsh.yaml index 14177849c20..c085dc71281 100644 --- a/src/dataflow/split_h5mu/config.vsh.yaml +++ b/src/dataflow/split_h5mu/config.vsh.yaml @@ -52,6 +52,7 @@ argument_groups: resources: - type: python_script path: script.py + - path: /src/utils/setup_logger.py test_resources: - type: python_script path: test.py diff --git a/src/dataflow/split_h5mu/script.py b/src/dataflow/split_h5mu/script.py index 04f3a97ba2b..1a42ee0a544 100644 --- a/src/dataflow/split_h5mu/script.py +++ b/src/dataflow/split_h5mu/script.py @@ -1,3 +1,4 @@ +import sys import mudata as mu import pandas as pd import re @@ -27,22 +28,8 @@ par["obs_feature"] = "Obs" ### VIASH END -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +sys.path.append(meta["resources_dir"]) +from setup_logger import setup_logger logger = setup_logger() diff --git a/src/dataflow/split_h5mu_train_test/script.py b/src/dataflow/split_h5mu_train_test/script.py index f9ab708b1e3..a0d9ce17dad 100644 --- a/src/dataflow/split_h5mu_train_test/script.py +++ b/src/dataflow/split_h5mu_train_test/script.py @@ -19,20 +19,7 @@ ### VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(sys.stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() diff --git a/src/dataflow/split_modalities/script.py b/src/dataflow/split_modalities/script.py index 7c19b49091f..9ca3d93ecd9 100644 --- a/src/dataflow/split_modalities/script.py +++ b/src/dataflow/split_modalities/script.py @@ -18,22 +18,7 @@ ### VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() def main() -> None: diff --git a/src/dimred/densmap/script.py b/src/dimred/densmap/script.py index a9e4c9af09a..64543a8ad46 100644 --- a/src/dimred/densmap/script.py +++ b/src/dimred/densmap/script.py @@ -16,22 +16,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Reading %s", par["input"]) diff --git a/src/dimred/lsi/script.py b/src/dimred/lsi/script.py index 8a1f5328823..600278100b4 100644 --- a/src/dimred/lsi/script.py +++ b/src/dimred/lsi/script.py @@ -27,22 +27,7 @@ from subset_vars import subset_vars -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() diff --git a/src/dimred/pca/script.py b/src/dimred/pca/script.py index 42b98ff723f..6d994a37f04 100644 --- a/src/dimred/pca/script.py +++ b/src/dimred/pca/script.py @@ -20,22 +20,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Reading %s.", par["input"]) diff --git a/src/dimred/tsne/script.py b/src/dimred/tsne/script.py index 4ed49246031..637e59e23ba 100644 --- a/src/dimred/tsne/script.py +++ b/src/dimred/tsne/script.py @@ -22,22 +22,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Reading %s", par["input"]) diff --git a/src/dimred/umap/script.py b/src/dimred/umap/script.py index d56277dc596..897cf30963d 100644 --- a/src/dimred/umap/script.py +++ b/src/dimred/umap/script.py @@ -22,22 +22,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Reading %s", par["input"]) diff --git a/src/feature_annotation/highly_variable_features_scanpy/script.py b/src/feature_annotation/highly_variable_features_scanpy/script.py index ac27cdb5fa9..ac6b97503d6 100644 --- a/src/feature_annotation/highly_variable_features_scanpy/script.py +++ b/src/feature_annotation/highly_variable_features_scanpy/script.py @@ -43,22 +43,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() mdata = mu.read_h5mu(par["input"]) diff --git a/src/feature_annotation/highly_variable_features_scanpy/test.py b/src/feature_annotation/highly_variable_features_scanpy/test.py index 90d41732654..4f1f111e78c 100644 --- a/src/feature_annotation/highly_variable_features_scanpy/test.py +++ b/src/feature_annotation/highly_variable_features_scanpy/test.py @@ -17,22 +17,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() @pytest.fixture diff --git a/src/feature_annotation/score_genes_cell_cycle_scanpy/script.py b/src/feature_annotation/score_genes_cell_cycle_scanpy/script.py index 3381d319d3d..b1b87664e71 100644 --- a/src/feature_annotation/score_genes_cell_cycle_scanpy/script.py +++ b/src/feature_annotation/score_genes_cell_cycle_scanpy/script.py @@ -32,52 +32,7 @@ # import helper functions sys.path.append(meta["resources_dir"]) - -# START TEMPORARY WORKAROUND read_gene_list -# reason: resources aren't available when using Nextflow fusion - -# from helper import read_gene_list -from typing import List, Dict, Any, Optional - -def read_gene_list( - par: Dict[str, Any], - gene_names: List[str], - list_key: str, - file_key: str, - required: bool = True) -> Optional[List[str]]: - """ - Reads a gene list from the parameters and returns it as a list of strings. - """ - - # check whether one or the other was provided, if required - if required and not par[list_key] and not par[file_key]: - raise ValueError(f"Either --{list_key} or --{file_key} must be set") - - # read gene list from parameters - list_of_genes = par[list_key] if par[list_key] else [] - - # read gene list from file - if par[file_key]: - with open(par[file_key]) as file: - file_genes = [x.strip() for x in file] - list_of_genes.extend(file_genes) - - # check for missing genes - if not par["allow_missing_genes"] and list_of_genes: - missing = set(list_of_genes).difference(gene_names) - if missing: - raise ValueError(f"The follow genes are missing from the input dataset: {missing}") - - # return gene list - if list_of_genes: - return list_of_genes - elif required: - raise ValueError(f"No genes detected in --{list_key} or --{file_key}") - else: - return None - - -# END TEMPORARY WORKAROUND read_gene_list +from helper import read_gene_list # read data mdata = mu.read(par["input"]) diff --git a/src/feature_annotation/score_genes_scanpy/script.py b/src/feature_annotation/score_genes_scanpy/script.py index 1475e31f1c2..8f87c457ccd 100644 --- a/src/feature_annotation/score_genes_scanpy/script.py +++ b/src/feature_annotation/score_genes_scanpy/script.py @@ -28,51 +28,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) - -# START TEMPORARY WORKAROUND read_gene_list -# reason: resources aren't available when using Nextflow fusion - -# from helper import read_gene_list -from typing import List, Dict, Any, Optional - -def read_gene_list( - par: Dict[str, Any], - gene_names: List[str], - list_key: str, - file_key: str, - required: bool = True) -> Optional[List[str]]: - """ - Reads a gene list from the parameters and returns it as a list of strings. - """ - - # check whether one or the other was provided, if required - if required and not par[list_key] and not par[file_key]: - raise ValueError(f"Either --{list_key} or --{file_key} must be set") - - # read gene list from parameters - list_of_genes = par[list_key] if par[list_key] else [] - - # read gene list from file - if par[file_key]: - with open(par[file_key]) as file: - file_genes = [x.strip() for x in file] - list_of_genes.extend(file_genes) - - # check for missing genes - if not par["allow_missing_genes"] and list_of_genes: - missing = set(list(list_of_genes)).difference(gene_names) - if missing: - raise ValueError(f"The follow genes are missing from the input dataset: {missing}") - - # return gene list - if list_of_genes: - return list_of_genes - elif required: - raise ValueError(f"No genes detected in --{list_key} or --{file_key}") - else: - return None - -# END TEMPORARY WORKAROUND read_gene_list +from helper import read_gene_list # read data mdata = mu.read(par["input"]) diff --git a/src/filter/delimit_fraction/script.py b/src/filter/delimit_fraction/script.py index 3ae4b7ef270..514f7d36509 100644 --- a/src/filter/delimit_fraction/script.py +++ b/src/filter/delimit_fraction/script.py @@ -19,22 +19,7 @@ ### VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Reading input data") diff --git a/src/filter/delimit_fraction/test.py b/src/filter/delimit_fraction/test.py index 70e3166e7c7..a2cb6ce1a22 100644 --- a/src/filter/delimit_fraction/test.py +++ b/src/filter/delimit_fraction/test.py @@ -4,6 +4,7 @@ import pytest import numpy as np from subprocess import CalledProcessError +from openpipelinetestutils.asserters import assert_annotation_objects_equal ## VIASH START meta = { @@ -14,24 +15,8 @@ ## VIASH END -from openpipelinetestutils.asserters import assert_annotation_objects_equal - -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +sys.path.append(meta["resources_dir"]) +from setup_logger import setup_logger logger = setup_logger() diff --git a/src/filter/do_filter/script.py b/src/filter/do_filter/script.py index e1a2945a2ac..746b18fe22a 100644 --- a/src/filter/do_filter/script.py +++ b/src/filter/do_filter/script.py @@ -19,22 +19,7 @@ ### VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Reading %s", par['input']) diff --git a/src/filter/filter_with_counts/script.py b/src/filter/filter_with_counts/script.py index 6e9e15f0ec6..bd7c71e5eb7 100644 --- a/src/filter/filter_with_counts/script.py +++ b/src/filter/filter_with_counts/script.py @@ -26,22 +26,7 @@ ### VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Reading input data") diff --git a/src/filter/filter_with_counts/test.py b/src/filter/filter_with_counts/test.py index ca362043b40..77b32414181 100644 --- a/src/filter/filter_with_counts/test.py +++ b/src/filter/filter_with_counts/test.py @@ -13,22 +13,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() diff --git a/src/filter/filter_with_scrublet/script.py b/src/filter/filter_with_scrublet/script.py index 6a4c6aaa322..f0c92906c07 100644 --- a/src/filter/filter_with_scrublet/script.py +++ b/src/filter/filter_with_scrublet/script.py @@ -30,22 +30,7 @@ ### VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Reading %s.", par['input']) diff --git a/src/filter/intersect_obs/config.vsh.yaml b/src/filter/intersect_obs/config.vsh.yaml index b70bcda4fd3..98bde5dbb3a 100644 --- a/src/filter/intersect_obs/config.vsh.yaml +++ b/src/filter/intersect_obs/config.vsh.yaml @@ -42,6 +42,7 @@ resources: - type: python_script path: script.py - path: /src/utils/setup_logger.py + - path: /src/utils/compress_h5mu.py test_resources: - type: python_script diff --git a/src/filter/intersect_obs/script.py b/src/filter/intersect_obs/script.py index a04dc685bf3..d99ae738079 100644 --- a/src/filter/intersect_obs/script.py +++ b/src/filter/intersect_obs/script.py @@ -16,76 +16,8 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger - -# START TEMPORARY WORKAROUND compress_h5mu -# reason: resources aren't available when using Nextflow fusion - -# from compress_h5mu import compress_h5mu -from h5py import Group, Dataset -from h5py import File as H5File -from typing import Union, Literal -from functools import partial -from pathlib import Path - -def compress_h5mu(input_path: Union[str, Path], - output_path: Union[str, Path], - compression: Union[Literal['gzip'], Literal['lzf']]): - input_path, output_path = str(input_path), str(output_path) - - def copy_attributes(in_object, out_object): - for key, value in in_object.attrs.items(): - out_object.attrs[key] = value - - def visit_path(output_h5: H5File, - compression: Union[Literal['gzip'], Literal['lzf']], - name: str, object: Union[Group, Dataset]): - if isinstance(object, Group): - new_group = output_h5.create_group(name) - copy_attributes(object, new_group) - elif isinstance(object, Dataset): - # Compression only works for non-scalar Dataset objects - # Scalar objects dont have a shape defined - if not object.compression and object.shape not in [None, ()]: - new_dataset = output_h5.create_dataset(name, data=object, compression=compression) - copy_attributes(object, new_dataset) - else: - output_h5.copy(object, name) - else: - raise NotImplementedError(f"Could not copy element {name}, " - f"type has not been implemented yet: {type(object)}") - - with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: - copy_attributes(input_h5, output_h5) - input_h5.visititems(partial(visit_path, output_h5, compression)) - - with open(input_path, "rb") as input_bytes: - # Mudata puts metadata like this in the first 512 bytes: - # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) - # See mudata/_core/io.py, read_h5mu() function - starting_metadata = input_bytes.read(100) - # The metadata is padded with extra null bytes up until 512 bytes - truncate_location = starting_metadata.find(b"\x00") - starting_metadata = starting_metadata[:truncate_location] - with open(output_path, "br+") as f: - nbytes = f.write(starting_metadata) - f.write(b"\0" * (512 - nbytes)) -# END TEMPORARY WORKAROUND compress_h5mu +from setup_logger import setup_logger +from compress_h5mu import compress_h5mu logger = setup_logger() diff --git a/src/filter/subset_obsp/script.py b/src/filter/subset_obsp/script.py index 71c9d481131..ccd21978779 100644 --- a/src/filter/subset_obsp/script.py +++ b/src/filter/subset_obsp/script.py @@ -1,3 +1,4 @@ +import sys import mudata as mu ### VIASH START @@ -12,23 +13,8 @@ 'output_compression': None, } ### VIASH END - -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +sys.path.append(meta["resources_dir"]) +from setup_logger import setup_logger logger = setup_logger() def main(): diff --git a/src/integrate/scarches/script.py b/src/integrate/scarches/script.py index 2445f4511a2..e6544250429 100644 --- a/src/integrate/scarches/script.py +++ b/src/integrate/scarches/script.py @@ -28,22 +28,7 @@ def mps_is_available(): ### VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() def _read_model_name_from_registry(model_path) -> str: diff --git a/src/integrate/scvi/script.py b/src/integrate/scvi/script.py index d3eb914caa5..6b10b3518f5 100644 --- a/src/integrate/scvi/script.py +++ b/src/integrate/scvi/script.py @@ -33,13 +33,7 @@ import sys sys.path.append(meta['resources_dir']) -# START TEMPORARY WORKAROUND subset_vars -# reason: resources aren't available when using Nextflow fusion -# from subset_vars import subset_vars -def subset_vars(adata, subset_col): - return adata[:, adata.var[subset_col]].copy() - -# END TEMPORARY WORKAROUND subset_vars +from subset_vars import subset_vars #TODO: optionally, move to qa # https://github.com/openpipelines-bio/openpipeline/issues/435 diff --git a/src/integrate/scvi/test.py b/src/integrate/scvi/test.py index 7e35dd77c41..7fa7c1251e3 100644 --- a/src/integrate/scvi/test.py +++ b/src/integrate/scvi/test.py @@ -15,13 +15,7 @@ import sys sys.path.append(meta['resources_dir']) -# START TEMPORARY WORKAROUND subset_vars -# reason: resources aren't available when using Nextflow fusion -# from subset_vars import subset_vars -def subset_vars(adata, subset_col): - return adata[:, adata.var[subset_col]].copy() - -# END TEMPORARY WORKAROUND subset_vars +from subset_vars import subset_vars input_file = f"{meta['resources_dir']}/pbmc_1k_protein_v3_mms.h5mu" diff --git a/src/integrate/totalvi/script.py b/src/integrate/totalvi/script.py index 20ae2043a60..5ba6507b54b 100644 --- a/src/integrate/totalvi/script.py +++ b/src/integrate/totalvi/script.py @@ -35,22 +35,7 @@ ### VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() def align_proteins_names(adata_reference: AnnData, mdata_query: MuData, adata_query: AnnData, reference_proteins_key: str, query_proteins_key: str) -> AnnData: diff --git a/src/labels_transfer/xgboost/script.py b/src/labels_transfer/xgboost/script.py index 7306975c566..6296530555a 100644 --- a/src/labels_transfer/xgboost/script.py +++ b/src/labels_transfer/xgboost/script.py @@ -52,22 +52,7 @@ sys.path.append(meta["resources_dir"]) from helper import check_arguments, get_reference_features, get_query_features -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() # read config arguments diff --git a/src/mapping/cellranger_atac_count/test.py b/src/mapping/cellranger_atac_count/test.py index 13e80139de6..80743232dac 100644 --- a/src/mapping/cellranger_atac_count/test.py +++ b/src/mapping/cellranger_atac_count/test.py @@ -11,22 +11,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("> Running command with folder") diff --git a/src/mapping/cellranger_multi/script.py b/src/mapping/cellranger_multi/script.py index b9f6d0b0dd0..953a10d5ae2 100644 --- a/src/mapping/cellranger_multi/script.py +++ b/src/mapping/cellranger_multi/script.py @@ -62,22 +62,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() # Tested with cellranger 7.0: diff --git a/src/mapping/star_align/test.py b/src/mapping/star_align/test.py index 2956c1cc905..1fd57d46c16 100644 --- a/src/mapping/star_align/test.py +++ b/src/mapping/star_align/test.py @@ -13,22 +13,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() ## Test 1: use input dir diff --git a/src/mapping/star_align_v273a/test.py b/src/mapping/star_align_v273a/test.py index 32697721171..5a140c7c305 100644 --- a/src/mapping/star_align_v273a/test.py +++ b/src/mapping/star_align_v273a/test.py @@ -13,22 +13,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() ## Test 1: use input dir diff --git a/src/metadata/add_id/script.py b/src/metadata/add_id/script.py index af8581065e4..f80b86482ae 100644 --- a/src/metadata/add_id/script.py +++ b/src/metadata/add_id/script.py @@ -12,22 +12,7 @@ ### VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() def make_observation_keys_unique(sample_id: str, sample: MuData) -> None: diff --git a/src/metadata/duplicate_obs/script.py b/src/metadata/duplicate_obs/script.py index a976126e2a4..c79ac63b4df 100644 --- a/src/metadata/duplicate_obs/script.py +++ b/src/metadata/duplicate_obs/script.py @@ -17,22 +17,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Read mudata from file") diff --git a/src/metadata/duplicate_var/script.py b/src/metadata/duplicate_var/script.py index 5b436368ca0..1be01e2881a 100644 --- a/src/metadata/duplicate_var/script.py +++ b/src/metadata/duplicate_var/script.py @@ -17,22 +17,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Read mudata from file") diff --git a/src/metadata/grep_annotation_column/config.vsh.yaml b/src/metadata/grep_annotation_column/config.vsh.yaml index 7b8a5ccbff3..61f586eb2c7 100644 --- a/src/metadata/grep_annotation_column/config.vsh.yaml +++ b/src/metadata/grep_annotation_column/config.vsh.yaml @@ -73,6 +73,7 @@ argument_groups: resources: - type: python_script path: script.py + - path: /src/utils/setup_logger.py test_resources: - type: python_script path: test.py diff --git a/src/metadata/grep_annotation_column/script.py b/src/metadata/grep_annotation_column/script.py index bb493de1bbf..20573577447 100644 --- a/src/metadata/grep_annotation_column/script.py +++ b/src/metadata/grep_annotation_column/script.py @@ -1,3 +1,4 @@ +import sys import mudata as mu from pathlib import Path from operator import attrgetter @@ -21,23 +22,8 @@ "output_compression": "gzip" } ### VIASH END - -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +sys.path.append(meta["resources_dir"]) +from setup_logger import setup_logger logger = setup_logger() def describe_array(arr, msg): diff --git a/src/metadata/join_csv/script.py b/src/metadata/join_csv/script.py index 6c923533a27..58250a4916e 100644 --- a/src/metadata/join_csv/script.py +++ b/src/metadata/join_csv/script.py @@ -15,22 +15,7 @@ ### VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() if par["obs_key"] and par["var_key"]: diff --git a/src/metadata/join_uns_to_obs/script.py b/src/metadata/join_uns_to_obs/script.py index fbba9b78da7..49451e8c758 100644 --- a/src/metadata/join_uns_to_obs/script.py +++ b/src/metadata/join_uns_to_obs/script.py @@ -12,22 +12,7 @@ ### VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Read mudata from file") diff --git a/src/metadata/move_obsm_to_obs/script.py b/src/metadata/move_obsm_to_obs/script.py index f0768a4cd96..7946afc3766 100644 --- a/src/metadata/move_obsm_to_obs/script.py +++ b/src/metadata/move_obsm_to_obs/script.py @@ -14,22 +14,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Read mudata from file") diff --git a/src/neighbors/find_neighbors/script.py b/src/neighbors/find_neighbors/script.py index 6c2cd9d30e9..059256de88b 100644 --- a/src/neighbors/find_neighbors/script.py +++ b/src/neighbors/find_neighbors/script.py @@ -27,22 +27,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Reading input mudata") diff --git a/src/qc/calculate_qc_metrics/script.py b/src/qc/calculate_qc_metrics/script.py index 02c730e7b3f..aedfc8d352b 100644 --- a/src/qc/calculate_qc_metrics/script.py +++ b/src/qc/calculate_qc_metrics/script.py @@ -24,22 +24,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() def count_nonzero(layer, axis): diff --git a/src/query/cellxgene_census/script.py b/src/query/cellxgene_census/script.py index d4ccf3918cf..7cea7db325a 100644 --- a/src/query/cellxgene_census/script.py +++ b/src/query/cellxgene_census/script.py @@ -21,24 +21,7 @@ sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger - - -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() diff --git a/src/scgpt/binning/config.vsh.yaml b/src/scgpt/binning/config.vsh.yaml index 03ecf43d2a5..65058307e26 100644 --- a/src/scgpt/binning/config.vsh.yaml +++ b/src/scgpt/binning/config.vsh.yaml @@ -66,6 +66,7 @@ argument_groups: resources: - type: python_script path: script.py + - path: /src/utils/setup_logger.py test_resources: - type: python_script path: test.py diff --git a/src/scgpt/binning/script.py b/src/scgpt/binning/script.py index c1f3a26d133..c273055c7be 100644 --- a/src/scgpt/binning/script.py +++ b/src/scgpt/binning/script.py @@ -1,3 +1,4 @@ +import sys import mudata as mu import numpy as np from scipy.sparse import csr_matrix @@ -18,22 +19,8 @@ if par["seed"]: np.random.seed(par["seed"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +sys.path.append(meta["resources_dir"]) +from setup_logger import setup_logger logger = setup_logger() logger.info("Reading in data") diff --git a/src/scgpt/cell_type_annotation/script.py b/src/scgpt/cell_type_annotation/script.py index 16e0205f336..df2bd68c13a 100644 --- a/src/scgpt/cell_type_annotation/script.py +++ b/src/scgpt/cell_type_annotation/script.py @@ -1,3 +1,4 @@ +import sys import json import os import mudata as mu @@ -37,22 +38,8 @@ ## VIASH END -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +sys.path.append(meta["resources_dir"]) +from setup_logger import setup_logger logger = setup_logger() class SeqDataset(Dataset): diff --git a/src/scgpt/cross_check_genes/script.py b/src/scgpt/cross_check_genes/script.py index 14a668a1d3d..4214a2fa4e6 100644 --- a/src/scgpt/cross_check_genes/script.py +++ b/src/scgpt/cross_check_genes/script.py @@ -1,3 +1,4 @@ +import sys import mudata as mu import numpy as np from scgpt.tokenizer.gene_tokenizer import GeneVocab @@ -13,23 +14,10 @@ } ## VIASH END -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +sys.path.append(meta["resources_dir"]) +from setup_logger import setup_logger logger = setup_logger() + # Read in data logger.info(f"Reading {par['input']}") mudata = mu.read_h5mu(par["input"]) diff --git a/src/scgpt/embedding/config.vsh.yaml b/src/scgpt/embedding/config.vsh.yaml index 82f5c30291e..20de97b5fd7 100644 --- a/src/scgpt/embedding/config.vsh.yaml +++ b/src/scgpt/embedding/config.vsh.yaml @@ -128,6 +128,7 @@ argument_groups: resources: - type: python_script path: script.py + - path: /src/utils/setup_logger.py test_resources: - type: python_script path: test.py diff --git a/src/scgpt/embedding/script.py b/src/scgpt/embedding/script.py index c2fe5181cd7..69abb52deda 100644 --- a/src/scgpt/embedding/script.py +++ b/src/scgpt/embedding/script.py @@ -1,3 +1,4 @@ +import sys import numpy as np import mudata as mu import json @@ -28,22 +29,8 @@ } ## VIASH END -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +sys.path.append(meta["resources_dir"]) +from setup_logger import setup_logger logger = setup_logger() logger.info(f"Setting device to {'cuda' if torch.cuda.is_available() else 'cpu'}") diff --git a/src/scgpt/pad_tokenize/config.vsh.yaml b/src/scgpt/pad_tokenize/config.vsh.yaml index 2653271331b..6f662ba9c05 100644 --- a/src/scgpt/pad_tokenize/config.vsh.yaml +++ b/src/scgpt/pad_tokenize/config.vsh.yaml @@ -97,6 +97,7 @@ argument_groups: resources: - type: python_script path: script.py + - path: /src/utils/setup_logger.py test_resources: - type: python_script path: test.py diff --git a/src/scgpt/pad_tokenize/script.py b/src/scgpt/pad_tokenize/script.py index ba3885681e4..c9422edc40e 100644 --- a/src/scgpt/pad_tokenize/script.py +++ b/src/scgpt/pad_tokenize/script.py @@ -1,3 +1,4 @@ +import sys import mudata as mu import numpy as np from scipy.sparse import issparse @@ -23,22 +24,8 @@ } ## VIASH END -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +sys.path.append(meta["resources_dir"]) +from setup_logger import setup_logger logger = setup_logger() logger.info("Reading in data") diff --git a/src/transform/delete_layer/script.py b/src/transform/delete_layer/script.py index e709c5f4263..2dc7dbb29e0 100644 --- a/src/transform/delete_layer/script.py +++ b/src/transform/delete_layer/script.py @@ -20,78 +20,11 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout +from setup_logger import setup_logger +from compress_h5mu import compress_h5mu - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger logger = setup_logger() -# START TEMPORARY WORKAROUND compress_h5mu -# reason: resources aren't available when using Nextflow fusion -# from compress_h5mu import compress_h5mu -from h5py import File as H5File -from h5py import Group, Dataset -from pathlib import Path -from typing import Union, Literal -from functools import partial - - -def compress_h5mu(input_path: Union[str, Path], - output_path: Union[str, Path], - compression: Union[Literal['gzip'], Literal['lzf']]): - input_path, output_path = str(input_path), str(output_path) - - def copy_attributes(in_object, out_object): - for key, value in in_object.attrs.items(): - out_object.attrs[key] = value - - def visit_path(output_h5: H5File, - compression: Union[Literal['gzip'], Literal['lzf']], - name: str, object: Union[Group, Dataset]): - if isinstance(object, Group): - new_group = output_h5.create_group(name) - copy_attributes(object, new_group) - elif isinstance(object, Dataset): - # Compression only works for non-scalar Dataset objects - # Scalar objects dont have a shape defined - if not object.compression and object.shape not in [None, ()]: - new_dataset = output_h5.create_dataset(name, data=object, compression=compression) - copy_attributes(object, new_dataset) - else: - output_h5.copy(object, name) - else: - raise NotImplementedError(f"Could not copy element {name}, " - f"type has not been implemented yet: {type(object)}") - - with H5File(input_path, 'r') as input_h5, H5File(output_path, 'w', userblock_size=512) as output_h5: - copy_attributes(input_h5, output_h5) - input_h5.visititems(partial(visit_path, output_h5, compression)) - - with open(input_path, "rb") as input_bytes: - # Mudata puts metadata like this in the first 512 bytes: - # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0) - # See mudata/_core/io.py, read_h5mu() function - starting_metadata = input_bytes.read(100) - # The metadata is padded with extra null bytes up until 512 bytes - truncate_location = starting_metadata.find(b"\x00") - starting_metadata = starting_metadata[:truncate_location] - with open(output_path, "br+") as f: - nbytes = f.write(starting_metadata) - f.write(b"\0" * (512 - nbytes)) -# END TEMPORARY WORKAROUND compress_h5mu - def main(): input_file, output_file, mod_name = Path(par["input"]), Path(par["output"]), par['modality'] diff --git a/src/transform/log1p/script.py b/src/transform/log1p/script.py index b097b5e6d02..bc29cff87d2 100644 --- a/src/transform/log1p/script.py +++ b/src/transform/log1p/script.py @@ -15,22 +15,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Reading input mudata") diff --git a/src/transform/move_layer/script.py b/src/transform/move_layer/script.py index d8366d1b157..a791b8c3e32 100644 --- a/src/transform/move_layer/script.py +++ b/src/transform/move_layer/script.py @@ -19,22 +19,7 @@ ### VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Read mudata from file") diff --git a/src/transform/normalize_total/script.py b/src/transform/normalize_total/script.py index eceaac02c77..4da8b8605fe 100644 --- a/src/transform/normalize_total/script.py +++ b/src/transform/normalize_total/script.py @@ -14,22 +14,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Reading input mudata") diff --git a/src/transform/regress_out/script.py b/src/transform/regress_out/script.py index be1543cd3a3..f1bb9244ffe 100644 --- a/src/transform/regress_out/script.py +++ b/src/transform/regress_out/script.py @@ -15,22 +15,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Reading input mudata") diff --git a/src/transform/scale/script.py b/src/transform/scale/script.py index 90d45869523..9a91036707d 100644 --- a/src/transform/scale/script.py +++ b/src/transform/scale/script.py @@ -15,22 +15,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() def main(): diff --git a/src/transform/tfidf/script.py b/src/transform/tfidf/script.py index 509053a4f12..2c4a89fe369 100644 --- a/src/transform/tfidf/script.py +++ b/src/transform/tfidf/script.py @@ -19,22 +19,7 @@ ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() logger.info("Reading input mudata") diff --git a/src/velocity/scvelo/script.py b/src/velocity/scvelo/script.py index f6d5354023d..cb632ca184e 100644 --- a/src/velocity/scvelo/script.py +++ b/src/velocity/scvelo/script.py @@ -34,22 +34,7 @@ def none_factory(): ## VIASH END sys.path.append(meta["resources_dir"]) -# START TEMPORARY WORKAROUND setup_logger -# reason: resources aren't available when using Nextflow fusion -# from setup_logger import setup_logger -def setup_logger(): - import logging - from sys import stdout - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - console_handler = logging.StreamHandler(stdout) - logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") - console_handler.setFormatter(logFormatter) - logger.addHandler(console_handler) - - return logger -# END TEMPORARY WORKAROUND setup_logger +from setup_logger import setup_logger logger = setup_logger() mpl.rcParams['savefig.dpi']=150