Skip to content

Commit

Permalink
Merge fad3a53 into 863bc61
Browse files Browse the repository at this point in the history
  • Loading branch information
cokelaer committed Nov 25, 2023
2 parents 863bc61 + fad3a53 commit bca0b1d
Show file tree
Hide file tree
Showing 10 changed files with 183 additions and 263 deletions.
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ Changelog
========= =======================================================================
Version Description
========= =======================================================================
1.5.0 * Uses click and new sequana_pipetools
1.4.0 * Implement demultiplexing of single cell ATAC seq data with
cellranger.
1.3.1 * use sequana_wrappers version in the config file
Expand Down
52 changes: 52 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "sequana-demultiplex"
version = "1.5.0"
description = "Pipeline that runs bcl2fastq and ease demultiplexing of Sequencing data"
authors = ["Sequana Team"]
license = "BSD-3"
repository = "https://github.com/sequana/demultiplex"
readme = "README.rst"
keywords = ["bcl2fastq", "Illumina", "sequana", "base caller", "demultiplexing"]
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Education",
"Intended Audience :: End Users/Desktop",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: BSD License",
"Operating System :: POSIX :: Linux",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Scientific/Engineering :: Bio-Informatics",
"Topic :: Scientific/Engineering :: Information Analysis",
]

packages = [
{include = "sequana_pipelines"}
]


[tool.poetry.dependencies]
python = ">=3.8,<4.0"
sequana = ">=0.15.0"
sequana_pipetools = ">=0.16.1"
click-completion = "^0.5.2"


[tool.poetry.scripts]
sequana_demultiplex = "sequana_pipelines.demultiplex.main:main"


[tool.poetry.group.dev.dependencies]
black = "^23.7.0"
pytest = "^7.4.0"
mock = "^5.1.0"
pytest-mock = "^3.11.1"
pytest-cov = "^4.1.0"

2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
sequana
sequana_pipetools>=0.12.5
sequana_pipetools>=0.16
2 changes: 1 addition & 1 deletion sequana_pipelines/demultiplex/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#
##############################################################################

sequana_wrappers: "v0.15.1"
sequana_wrappers: "v23.11.18"

#################################################################
# general
Expand Down
247 changes: 121 additions & 126 deletions sequana_pipelines/demultiplex/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
#
# Copyright (c) 2016-2021 - Sequana Development Team
#
# File author(s):
# Thomas Cokelaer <thomas.cokelaer@pasteur.fr>
#
# Distributed under the terms of the 3-clause BSD license.
# The full license is in the LICENSE file, distributed with this software.
#
Expand All @@ -15,109 +12,108 @@
##############################################################################
import sys
import os
import argparse
import subprocess

from sequana_pipetools.options import *
from sequana_pipetools.misc import Colors
from sequana_pipetools.info import sequana_epilog, sequana_prolog
from sequana_pipetools import SequanaManager
import rich_click as click
import click_completion

col = Colors()
click_completion.init()

NAME = "demultiplex"

from sequana_pipetools.options import *
from sequana_pipetools import SequanaManager

class Options(argparse.ArgumentParser):
def __init__(self, prog=NAME, epilog=None):
usage = col.purple(sequana_prolog.format(**{"name": NAME}))
super(Options, self).__init__(usage=usage, prog=prog, description="",
epilog=epilog,
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)

# add a new group of options to the parser
# demultiplex requires lots of memory sometimes hence the 64G options
#
so = SlurmOptions(queue="biomicspole", memory="64000", cores=16)
so.add_options(self)

# add a snakemake group of options to the parser
so = SnakemakeOptions(working_directory="fastq")
so.add_options(self)

so = GeneralOptions()
so.add_options(self)

pipeline_group = self.add_argument_group("pipeline")

pipeline_group.add_argument("--threads", dest="threads", default=4,
type=int, help="Number of threads to use during the demultiplexing. ")
pipeline_group.add_argument("--barcode-mismatch", dest="mismatch", default=0, type=int)
pipeline_group.add_argument("--merging-strategy", required=True,
dest="merging_strategy", choices=["merge", "none", "none_and_force"],
help="""Merge Lanes or not. options are : merge, none, none_and_force.
help = init_click(
NAME,
groups={
"Pipeline Specific": ["--method", "--skip-multiqc"],
},
)


@click.command(context_settings=help)
@include_options_from(ClickSnakemakeOptions, working_directory=NAME)
@include_options_from(ClickSlurmOptions)
@include_options_from(ClickInputOptions, add_input_readtag=False)
@include_options_from(ClickGeneralOptions)
@click.option(
"--threads",
"threads",
default=4,
show_default=True,
type=click.INT,
help="Number of threads to use during the demultiplexing. ",
)
@click.option("--barcode-mismatch", "mismatch", default=0, type=click.INT, show_default=True)
@click.option(
"--merging-strategy",
"merging_strategy",
required=True,
type=click.Choice(["merge", "none", "none_and_force"]),
help="""Merge Lanes or not. options are : merge, none, none_and_force.
The 'merge' choice merges all lanes. The 'none' choice do NOT merge the lanes.
For NextSeq runs, we should merge the lanes; if users demultiplex NextSeq
and set this option to none, an error is raised. If you still want to
skip the merging step, then set this option to 'none_and_force'. For sc-atac seq, use merge.""")
pipeline_group.add_argument("--bcl-directory", dest="bcl_directory",
required=True, help="""Directory towards the raw BCL files. This directory should
contains files such as RunParameters.xml, RunInfo.xml """)
pipeline_group.add_argument("--sample-sheet", dest="samplesheet",
required=True,
default="SampleSheet.csv", help="Sample sheet filename to be used")
pipeline_group.add_argument("--no-ignore-missing-bcls",
dest="no_ignore_missing_bcls", action="store_true", default=False,
help="""In bcl2fastq, the option --ignore-missing-bcls implies that
skip the merging step, then set this option to 'none_and_force'. For sc-atac seq, use merge.""",
)
@click.option(
"--bcl-directory",
"bcl_directory",
required=True,
help="""Directory towards the raw BCL files. This directory should
contains files such as RunParameters.xml, RunInfo.xml """,
)
@click.option(
"--sample-sheet",
"samplesheet",
required=True,
default="SampleSheet.csv",
show_default=True,
help="Sample sheet filename to be used",
)
@click.option(
"--no-ignore-missing-bcls",
"no_ignore_missing_bcls",
is_flag=True,
default=False,
show_default=True,
help="""In bcl2fastq, the option --ignore-missing-bcls implies that
we assume 'N'/'#' for missing calls. In Sequana_demultiplex, we use that option
by default. If you do not want that behviour, but the one from bcl2fastq, use
this flag(--no-ignore-missing-bcls)""")
pipeline_group.add_argument("--bgzf-compression",
dest="bgzf_compression", action="store_true", default=False,
help="""turn on BGZF compression for FASTQ files. By default,
this flag(--no-ignore-missing-bcls)""",
)
@click.option(
"--bgzf-compression",
"bgzf_compression",
is_flag=True,
show_default=False,
help="""turn on BGZF compression for FASTQ files. By default,
bcl2fastq uses this option; By default we don't. Set --bgzl--compression flag to
set it back""")
self.add_argument("--mars-seq", default=False, action="store_true",
help="""Set options to --minimum-trimmed-read-length 15 --mask-short-adapter-reads 15
and do not merge lanes""")
self.add_argument("--scatac-seq", default=False, action="store_true",
help="""Set options to perform single cell ATAC demultiplexing using cellranger.""")
self.add_argument("--run", default=False, action="store_true",
help="execute the pipeline directly")

def parse_args(self, *args):
args_list = list(*args)
if "--from-project" in args_list:
if len(args_list)>2:
msg = "WARNING [sequana]: With --from-project option, " + \
"pipeline and data-related options will be ignored."
print(col.error(msg))
for action in self._actions:
if action.required is True:
action.required = False
options = super(Options, self).parse_args(*args)
return options

def main(args=None):

if args is None:
args = sys.argv

# whatever needs to be called by all pipeline before the options parsing
from sequana_pipetools.options import before_pipeline
before_pipeline(NAME)

# option parsing including common epilog
options = Options(NAME, epilog=sequana_epilog).parse_args(args[1:])


set it back""",
)
@click.option(
"--mars-seq",
default=False,
is_flag=True,
show_default=True,
help="""Set options to--minimum-trimmed-read-length 15 --mask-short-adapter-reads 15
and do not merge lanes""",
)
@click.option(
"--scatac-seq",
default=False,
is_flag="store_true",
help="""Set options to perform single cell ATAC demultiplexing using cellranger.""",
)
def main(**options):
# the real stuff is here
manager = SequanaManager(options, NAME)
options = manager.options

# create the beginning of the command and the working directory
manager.setup()
from sequana import logger

logger.setLevel(options.level)

# ============================================== sanity checks
Expand All @@ -129,7 +125,6 @@ def main(args=None):
logger.error(f"{options.bcl_directory} file does not exists")
sys.exit(1)


# NextSeq
runparam_1 = options.bcl_directory + os.sep + "RunParameters.xml"

Expand Down Expand Up @@ -158,49 +153,49 @@ def main(args=None):
logger.warning(msg)
sys.exit(1)

if options.from_project is None:
cfg = manager.config.config
cfg.general.input_directory = os.path.abspath(options.bcl_directory)
cfg.bcl2fastq.threads = options.threads
cfg.bcl2fastq.barcode_mismatch = options.mismatch
cfg.general.samplesheet_file = os.path.abspath(options.samplesheet)

# this is defined by the working_directory
#cfg.bcl2fastq.output_directory = "."
cfg.bcl2fastq.ignore_missing_bcls = not options.no_ignore_missing_bcls
cfg.bcl2fastq.no_bgzf_compression = not options.bgzf_compression

if options.merging_strategy == "merge":
cfg.bcl2fastq.merge_all_lanes = True
elif options.merging_strategy in ["none", "none_and_force"]:
cfg = manager.config.config
cfg.general.input_directory = os.path.abspath(options.bcl_directory)
cfg.bcl2fastq.threads = options.threads
cfg.bcl2fastq.barcode_mismatch = options.mismatch
cfg.general.samplesheet_file = os.path.abspath(options.samplesheet)

# this is defined by the working_directory
# cfg.bcl2fastq.output_directory = "."
cfg.bcl2fastq.ignore_missing_bcls = not options.no_ignore_missing_bcls
cfg.bcl2fastq.no_bgzf_compression = not options.bgzf_compression

if options.merging_strategy == "merge":
cfg.bcl2fastq.merge_all_lanes = True
elif options.merging_strategy in ["none", "none_and_force"]:
cfg.bcl2fastq.merge_all_lanes = False

#
if options.mars_seq:
cfg.bcl2fastq.options = " --minimum-trimmed-read-length 15 --mask-short-adapter-reads 15 "
if options.merging_strategy in ["merge"]:
logger.warning("with --mars-seq option, the merging strategy should be none_and_force")
cfg.bcl2fastq.merge_all_lanes = False

#
if options.mars_seq:
cfg.bcl2fastq.options = " --minimum-trimmed-read-length 15 --mask-short-adapter-reads 15 "
if options.merging_strategy in ["merge"]:
logger.warning("with --mars-seq option, the merging strategy should be none_and_force")
cfg.bcl2fastq.merge_all_lanes = False
cfg.general.mode = "bcl2fastq"
elif options.scatac_seq:
cfg.cellranger_atac.options = ""
cfg.general.mode = "cellranger_atac"
else: # All other cases with bcl2fastq
from sequana.iem import IEM
cfg.general.mode = "bcl2fastq"
try:
ss = IEM(cfg.general.samplesheet_file)
ss.validate()
except Exception as err:
logger.critical(err)
logger.critical("""Your sample sheet seems to be incorrect. Before running the pipeline you will have to fix it. You may use 'sequana samplesheet --quick-fix'""")
cfg.general.mode = "bcl2fastq"
elif options.scatac_seq:
cfg.cellranger_atac.options = ""
cfg.general.mode = "cellranger_atac"
else: # All other cases with bcl2fastq
from sequana.iem import IEM

cfg.general.mode = "bcl2fastq"
try:
ss = IEM(cfg.general.samplesheet_file)
ss.validate()
except Exception as err:
logger.critical(err)
logger.critical(
"""Your sample sheet seems to be incorrect. Before running the pipeline you will have to fix it. You may use 'sequana samplesheet --quick-fix'"""
)

# finalise the command and save it; copy the snakemake. update the config
# file and save it.
manager.teardown(check_input_files=False)

if options.run:
subprocess.Popen(["sh", '{}.sh'.format(NAME)], cwd=options.workdir)

if __name__ == "__main__":
main()
15 changes: 0 additions & 15 deletions setup.cfg

This file was deleted.

Loading

0 comments on commit bca0b1d

Please sign in to comment.