nf-core · mari-ga · May 21, 2024 · May 21, 2024 · May 21, 2024 · May 23, 2024
@@ -4,11 +4,12 @@
 
 Copyright (c) Gregor Sturm 2023 - MIT License
 """
-from subprocess import run
+
+import re
+import shlex
 from pathlib import Path
+from subprocess import run
 from textwrap import dedent
-import shlex
-import re
 
 
 def chunk_iter(seq, size):
@@ -34,11 +35,11 @@ def chunk_iter(seq, size):
 # Match R1 in the filename, but only if it is followed by a non-digit or non-character
 # match "file_R1.fastq.gz", "file.R1_000.fastq.gz", etc. but
 # do not match "SRR12345", "file_INFIXR12", etc
-filename_pattern =  r'([^a-zA-Z0-9])R1([^a-zA-Z0-9])'
+filename_pattern = r"([^a-zA-Z0-9])R1([^a-zA-Z0-9])"
 
 for i, (r1, r2) in enumerate(chunk_iter(fastqs, 2), start=1):
     # double escapes are required because nextflow processes this python 'template'
-    if re.sub(filename_pattern, r'\\1R2\\2', r1.name) != r2.name:
+    if re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name:
         raise AssertionError(
             dedent(
                 f"""\
@@ -58,13 +59,19 @@ def chunk_iter(seq, size):
 run(
     # fmt: off
     [
-        "cellranger", "count",
-        "--id", "${prefix}",
-        "--fastqs", str(fastq_all),
-        "--transcriptome", "${reference.name}",
-        "--localcores", "${task.cpus}",
-        "--localmem", "${task.memory.toGiga()}",
-        *shlex.split("""${args}""")
+        "cellranger",
+        "count",
+        "--id",
+        "${prefix}",
+        "--fastqs",
+        str(fastq_all),
+        "--transcriptome",
+        "${reference.name}",
+        "--localcores",
+        "${task.cpus}",
+        "--localmem",
+        "${task.memory.toGiga()}",
+        *shlex.split("""${args}"""),
     ],
     # fmt: on
     check=True,

@@ -5,11 +5,11 @@
 Copyright (c) Felipe Almeida 2024 - MIT License
 """
 
-from subprocess import run
+import re
+import shlex
 from pathlib import Path
+from subprocess import run
 from textwrap import dedent
-import shlex
-import re
 
 
 def chunk_iter(seq, size):
@@ -36,7 +36,9 @@ def chunk_iter(seq, size):
     #   - ...
     # Since we require fastq files in the input channel to be ordered such that a R1/R2 pair
     # of files follows each other, ordering will get us a sequence of [R1, R2, R1, R2, ...]
-    fastqs = sorted(p for p in Path(".").glob(f"fastqs/{modality}/*/*") if p.name != EMPTY_FILE)
+    fastqs = sorted(
+        p for p in Path(".").glob(f"fastqs/{modality}/*/*") if p.name != EMPTY_FILE
+    )
     assert len(fastqs) % 2 == 0
 
     # target directory in which the renamed fastqs will be placed
@@ -46,7 +48,9 @@ def chunk_iter(seq, size):
     for i, (r1, r2) in enumerate(chunk_iter(fastqs, 2), start=1):
         # will we rename the files or just move it with the same name to
         # the 'fastq_all' directory which is where files are expected?
-        if "${skip_renaming}" == "true":  # nf variables are true/false, which are different from Python
+        if (
+            "${skip_renaming}" == "true"
+        ):  # nf variables are true/false, which are different from Python
             resolved_name_r1 = r1.name
             resolved_name_r2 = r2.name
 
@@ -140,20 +144,24 @@ def chunk_iter(seq, size):
 # check the extra data that is included
 #
 if len("${include_cmo}") > 0:
-    with open("${cmo_csv_text}", "r") as input_conf:
+    with open("${cmo_csv_text}") as input_conf:
         config_txt = config_txt + "\\n${include_cmo}\\n" + input_conf.read() + "\\n"
 
 if len("${include_beam}") > 0:
-    with open("${beam_csv_text}", "r") as input_conf, open("${beam_antigen_csv}", "r") as input_csv:
+    with open("${beam_csv_text}") as input_conf, open(
+        "${beam_antigen_csv}"
+    ) as input_csv:
         config_txt = config_txt + "\\n${include_beam}\\n" + input_conf.read() + "\\n"
         config_txt = config_txt + "[feature]\\n" + input_csv.read() + "\\n"
 
 if len("${include_frna}") > 0:
-    with open("${frna_csv_text}", "r") as input_conf:
+    with open("${frna_csv_text}") as input_conf:
         config_txt = config_txt + "\\n${include_frna}\\n" + input_conf.read() + "\\n"
 
 # Remove blank lines from config text
-config_txt = "\\n".join([line for line in config_txt.split("\\n") if line.strip() != ""])
+config_txt = "\\n".join(
+    [line for line in config_txt.split("\\n") if line.strip() != ""]
+)
 
 # Save config file
 with open("${config}", "w") as file:

diff --git a/modules/nf-core/custom/catadditionalfasta/templates/fasta2gtf.py b/modules/nf-core/custom/catadditionalfasta/templates/fasta2gtf.py
@@ -2,11 +2,11 @@
 
 # Written by Pranathi Vemuri, later modified by Jonathan Manning and released under the MIT license.
 
-import os
 import logging
+import os
 import platform
-from typing import Iterator, Tuple
 from itertools import groupby
+from typing import Iterator, Tuple
 
 
 def setup_logging() -> logging.Logger:

@@ -3,11 +3,11 @@
 
 """Provide functions to merge multiple versions.yml files."""
 
-
-import yaml
 import platform
 from textwrap import dedent
 
+import yaml
+
 
 def _make_versions_html(versions):
     """Generate a tabular HTML output of all versions for MultiQC."""
@@ -58,7 +58,9 @@ def main():
     }
 
     with open("$versions") as f:
-        versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module
+        versions_by_process = (
+            yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module
+        )
 
     # aggregate versions by the module name (derived from fully-qualified process name)
     versions_by_module = {}

diff --git a/modules/nf-core/custom/tx2gene/templates/tx2gene.py b/modules/nf-core/custom/tx2gene/templates/tx2gene.py
@@ -2,13 +2,12 @@
 
 # Written by Lorena Pantano with subsequent reworking by Jonathan Manning. Released under the MIT license.
 
-import logging
-import argparse
 import glob
+import logging
 import os
 import platform
 import re
-from collections import Counter, defaultdict, OrderedDict
+from collections import Counter, OrderedDict
 from collections.abc import Set
 from typing import Dict
 
@@ -17,6 +16,7 @@
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
+
 def format_yaml_like(data: dict, indent: int = 0) -> str:
     """Formats a dictionary to a YAML-like string.
 
@@ -36,6 +36,7 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
             yaml_str += f"{spaces}{key}: {value}\\n"
     return yaml_str
 
+
 def read_top_transcripts(quant_dir: str, file_pattern: str) -> Set[str]:
     """
     Read the top 100 transcripts from the quantification file.
@@ -50,9 +51,13 @@ def read_top_transcripts(quant_dir: str, file_pattern: str) -> Set[str]:
     try:
         # Find the quantification file within the directory
         quant_file_path = glob.glob(os.path.join(quant_dir, "*", file_pattern))[0]
-        with open(quant_file_path, "r") as file_handle:
+        with open(quant_file_path) as file_handle:
             # Read the file and extract the top 100 transcripts
-            return {line.split()[0] for i, line in enumerate(file_handle) if i > 0 and i <= 100}
+            return {
+                line.split()[0]
+                for i, line in enumerate(file_handle)
+                if i > 0 and i <= 100
+            }
     except IndexError:
         # Log an error and raise a FileNotFoundError if the quant file does not exist
         logger.error("No quantification files found.")
@@ -81,7 +86,9 @@ def discover_transcript_attribute(gtf_file: str, transcripts: Set[str]) -> str:
             attributes_str = cols[8]
             attributes = dict(re.findall(r'(\\S+) "(.*?)(?<!\\\\)";', attributes_str))
 
-            votes.update(key for key, value in attributes.items() if value in transcripts)
+            votes.update(
+                key for key, value in attributes.items() if value in transcripts
+            )
 
     if not votes:
         # Error out if no matching attribute is found
@@ -123,7 +130,12 @@ def parse_attributes(attributes_text: str) -> Dict[str, str]:
 
 
 def map_transcripts_to_gene(
-    quant_type: str, gtf_file: str, quant_dir: str, gene_id: str, extra_id_field: str, output_file: str
+    quant_type: str,
+    gtf_file: str,
+    quant_dir: str,
+    gene_id: str,
+    extra_id_field: str,
+    output_file: str,
 ) -> bool:
     """
     Map transcripts to gene names and write the output to a file.
@@ -140,7 +152,9 @@ def map_transcripts_to_gene(
     bool: True if the operation was successful, False otherwise.
     """
     # Read the top transcripts based on quantification type
-    transcripts = read_top_transcripts(quant_dir, "quant.sf" if quant_type == "salmon" else "abundance.tsv")
+    transcripts = read_top_transcripts(
+        quant_dir, "quant.sf" if quant_type == "salmon" else "abundance.tsv"
+    )
     # Discover the attribute that corresponds to transcripts in the GTF
     transcript_attribute = discover_transcript_attribute(gtf_file, transcripts)
 
@@ -156,28 +170,35 @@ def map_transcripts_to_gene(
             attr_dict = parse_attributes(cols[8])
             if gene_id in attr_dict and transcript_attribute in attr_dict:
                 # Create a unique identifier for the transcript-gene combination
-                transcript_gene_pair = (attr_dict[transcript_attribute], attr_dict[gene_id])
+                transcript_gene_pair = (
+                    attr_dict[transcript_attribute],
+                    attr_dict[gene_id],
+                )
 
                 # Check if the combination has already been seen
                 if transcript_gene_pair not in seen:
                     # If it's a new combination, write it to the output and add to the seen set
                     extra_id = attr_dict.get(extra_id_field, attr_dict[gene_id])
-                    output_handle.write(f"{attr_dict[transcript_attribute]}\\t{attr_dict[gene_id]}\\t{extra_id}\\n")
+                    output_handle.write(
+                        f"{attr_dict[transcript_attribute]}\\t{attr_dict[gene_id]}\\t{extra_id}\\n"
+                    )
                     seen.add(transcript_gene_pair)
 
     return True
 
 
 # Main function to parse arguments and call the mapping function
 if __name__ == "__main__":
-    if '${task.ext.prefix}' != "null":
+    if "${task.ext.prefix}" != "null":
         prefix = "${task.ext.prefix}."
-    elif '$meta.id' != "null":
-        prefix = '${meta.id}.'
+    elif "$meta.id" != "null":
+        prefix = "${meta.id}."
     else:
-        prefix = ''
+        prefix = ""
 
-    if not map_transcripts_to_gene('$quant_type', '$gtf', 'quants', '$id', '$extra', f"{prefix}tx2gene.tsv"):
+    if not map_transcripts_to_gene(
+        "$quant_type", "$gtf", "quants", "$id", "$extra", f"{prefix}tx2gene.tsv"
+    ):
         logger.error("Failed to map transcripts to genes.")
 
     # Write the versions

diff --git a/modules/nf-core/gmmdemux/environment.yml b/modules/nf-core/gmmdemux/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "gmmdemux"
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - "bioconda::gmm-demux=0.2.2.3"
diff --git a/modules/nf-core/gmmdemux/main.nf b/modules/nf-core/gmmdemux/main.nf
@@ -0,0 +1,67 @@
+
+process GMMDEMUX {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/gmm-demux:0.2.2.3--pyh7cba7a3_0':
+        'biocontainers/gmm-demux:0.2.2.3--pyh7cba7a3_0' }"
+
+    input:
+    tuple val(meta), path(cell_hashing_barcodes,stageAs: "hto_files/*"), path(cell_hashing_matrix,stageAs: "hto_files/*"),path(cell_hashing_features,stageAs: "hto_files/*"),val(hto_names)
+    val type_report
+    val summary_report
+    val skip
+
+    output:
+    tuple val(meta), path("${meta.id}/barcodes.tsv.gz"                          ), emit: barcodes
+    tuple val(meta), path("${meta.id}/*.mtx.gz"                                 ), emit: matrix
+    tuple val(meta), path("${meta.id}/features.tsv.gz"                          ), emit: features
+    tuple val(meta), path("${meta.id}/classification_report_${meta.id}"     ), emit: classification_report
+    tuple val(meta), path("summary_report_${meta.id}.txt"                       ), emit: summary_report, optional: true
+    path "versions.yml"           , emit: versions
-    tuple val(meta), path("${meta.id}/barcodes.tsv.gz"                          ), emit: barcodes
-    tuple val(meta), path("${meta.id}/*.mtx.gz"                                 ), emit: matrix
-    tuple val(meta), path("${meta.id}/features.tsv.gz"                          ), emit: features
-    tuple val(meta), path("${meta.id}/classification_report_${meta.id}"     ), emit: classification_report
-    tuple val(meta), path("summary_report_${meta.id}.txt"                       ), emit: summary_report, optional: true
-    path "versions.yml"           , emit: versions
+    tuple val(meta), path("${prefix}/barcodes.tsv.gz")                , emit: barcodes
+    tuple val(meta), path("${prefix}/*.mtx.gz")                       , emit: matrix
+    tuple val(meta), path("${prefix}/features.tsv.gz")                , emit: features
+    tuple val(meta), path("${prefix}/classification_report_${prefix}"), emit: classification_report
+    tuple val(meta), path("summary_report_${prefix}.txt")             , emit: summary_report, optional: true
+    path "versions.yml"                                               , emit: versions
-    tuple val(meta), path("${meta.id}/barcodes.tsv.gz"                          ), emit: barcodes
-    tuple val(meta), path("${meta.id}/*.mtx.gz"                                 ), emit: matrix
-    tuple val(meta), path("${meta.id}/features.tsv.gz"                          ), emit: features
-    tuple val(meta), path("${meta.id}/classification_report_${meta.id}"     ), emit: classification_report
-    tuple val(meta), path("summary_report_${meta.id}.txt"                       ), emit: summary_report, optional: true
-    path "versions.yml"           , emit: versions
+    tuple val(meta), path("${prefix}/barcodes.tsv.gz")                , emit: barcodes
+    tuple val(meta), path("${prefix}/*.mtx.gz")                       , emit: matrix
+    tuple val(meta), path("${prefix}/features.tsv.gz")                , emit: features
+    tuple val(meta), path("${prefix}/classification_report_${prefix}"), emit: classification_report
+    tuple val(meta), path("summary_report_${prefix}.txt")             , emit: summary_report, optional: true
+    path "versions.yml"                                               , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args           = task.ext.args ?: ''
+    def prefix         = task.ext.prefix ?: "${meta.id}"
+    def type_report    = type_report ? "-f test/classification_report_${prefix}" : "-s test/classification_report_${prefix}"
-    def type_report    = type_report ? "-f test/classification_report_${prefix}" : "-s test/classification_report_${prefix}"
+    def type_report    = type_report ? "-f ${prefix}/classification_report_${prefix}" : "-s ${prefix}/classification_report_${prefix}"
-    def type_report    = type_report ? "-f test/classification_report_${prefix}" : "-s test/classification_report_${prefix}"
+    def type_report    = type_report ? "-f ${prefix}/classification_report_${prefix}" : "-s ${prefix}/classification_report_${prefix}"
+    def skip           = skip ? "--skip $skip" : ""
+    def summary_rep    = summary_report ? "-r summary_report_${prefix}.txt" : ""
+    def VERSION        = '0.2.2.3' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
+    """
+    if [[ ${summary_report} == true ]]; then
+        cat /dev/null >  summary_report_${prefix}.txt
+        echo "summary report file created"
+    fi 
+    GMM-demux $args \\
+        $type_report \\
+        $summary_rep \\
+        $skip \\
+        hto_files \\
+        $hto_names
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        GMM-Demux: $VERSION
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix  = task.ext.prefix ?: "${meta.id}"
+    def VERSION = '0.2.2.3'
+    """
+    mkdir test
+    touch test/barcodes.tsv.gz
+    touch test/features.tsv.gz
+    touch test/matrix.mtx.gz
-    mkdir test
-    touch test/barcodes.tsv.gz
-    touch test/features.tsv.gz
-    touch test/matrix.mtx.gz
+    touch barcodes.tsv.gz
+    touch features.tsv.gz
+    touch matrix.mtx.gz
-    mkdir test
-    touch test/barcodes.tsv.gz
-    touch test/features.tsv.gz
-    touch test/matrix.mtx.gz
+    mkdir "${prefix}"
+    touch "${prefix}/barcodes.tsv.gz"
+    touch "${prefix}/features.tsv.gz"
+    touch "${prefix}/matrix.mtx.gz"
+    touch "${prefix}/classification_report_${prefix}"
+    touch "summary_report_${prefix}.txt"
-    mkdir test
-    touch test/barcodes.tsv.gz
-    touch test/features.tsv.gz
-    touch test/matrix.mtx.gz
+    touch barcodes.tsv.gz
+    touch features.tsv.gz
+    touch matrix.mtx.gz
-    mkdir test
-    touch test/barcodes.tsv.gz
-    touch test/features.tsv.gz
-    touch test/matrix.mtx.gz
+    mkdir "${prefix}"
+    touch "${prefix}/barcodes.tsv.gz"
+    touch "${prefix}/features.tsv.gz"
+    touch "${prefix}/matrix.mtx.gz"
+    touch "${prefix}/classification_report_${prefix}"
+    touch "summary_report_${prefix}.txt"
+    touch test/classification_report_test
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        GMM-Demux: $VERSION
+    END_VERSIONS
+    """
+}