Skip to content

Commit

Permalink
Merge pull request #11 from cokelaer/main
Browse files Browse the repository at this point in the history
Update pipeline to add cellranger
  • Loading branch information
cokelaer committed Oct 24, 2023
2 parents 38c616d + bf12b92 commit 4e5e1fb
Show file tree
Hide file tree
Showing 8 changed files with 111 additions and 75 deletions.
2 changes: 2 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ Changelog
========= =======================================================================
Version Description
========= =======================================================================
1.4.0 * Implement demultiplexing of single cell ATAC seq data with
cellranger.
1.3.1 * use sequana_wrappers version in the config file
1.3.0 * use latest sequana-wrappers to benefit and graphivz apptainer
1.2.1 * Update CI action and use new sequana_pipetools v0.9.0
Expand Down
18 changes: 14 additions & 4 deletions sequana_pipelines/demultiplex/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
sequana_wrappers: "v0.15.1"

#################################################################
# bcl2fastq
# general
#
# :Parameters:
#
Expand All @@ -24,16 +24,19 @@ sequana_wrappers: "v0.15.1"
#
# if merge_all_lanes is set to True, merged all lanes. This must be used with
# NextSeq sequencers for instance.
#
# - mode must be set to bcl2fastq or cellranger_atac
general:
input_directory:
samplesheet_file:
mode: "bcl2fastq"


apptainers:
graphviz: "https://zenodo.org/record/7928262/files/graphviz_7.0.5.img"

cellranger_atac: "https://zenodo.org/record/8423332/files/cellranger_atac_2.1.0.img"

###################################################################################
# bcl2fastq
#
#
#
Expand All @@ -44,11 +47,18 @@ apptainers:
bcl2fastq:
threads: 4
barcode_mismatch: 0
samplesheet_file:
ignore_missing_bcls: true
no_bgzf_compression: true
options: ''
merge_all_lanes: true
resources:
mem: 64G


###################################################################################
# cellranger_atac
#
cellranger_atac:
options: ''
resources:
mem: 64G
91 changes: 49 additions & 42 deletions sequana_pipelines/demultiplex/demultiplex.rules
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,7 @@ configfile: "config.yaml"
# A convenient manager
manager = PipelineManagerDirectory("demultiplex", config)

# should be part of PipelineManagerDirectory
manager.wrappers = config.get("sequana_wrappers", "main")


samplesheet = config['bcl2fastq']['samplesheet_file'].strip()
samplesheet = config['general']['samplesheet_file'].strip()


if samplesheet.strip() != "":
Expand All @@ -51,16 +47,9 @@ if samplesheet.strip() != "":
sys.exit(1)


# Check the sample sheet
try:
from sequana import iem
samplesheet = iem.IEM(samplesheet)
samplesheet.validate()
except Exception as err:
log.error(err)
log.error("""Your sample sheet seems to be incorrect. Before running the pipeline
you will have to fix it. You may use 'sequana samplesheet --quick-fix'""")
sys.exit(1)
# check sample sheet for bcl2fastq only
if config["general"]["mode"] == "bcl2fastq":
shell(f"sequana samplesheet --check {samplesheet}")


rule pipeline:
Expand Down Expand Up @@ -97,36 +86,54 @@ rule plot_unknown_barcodes:
df.to_csv(output.csv)


rule check_samplesheet:
input: config['bcl2fastq']['samplesheet_file']
output: temp("ss.log")
shell:
"""
sequana samplesheet --check {input[0]} 2> {output[0]}
"""

rule bcl2fastq:
input:
valid_samplesheet="ss.log",
samplesheet=config['bcl2fastq']['samplesheet_file']
output:
"Stats/Stats.json",
params:
indir= config["general"]["input_directory"],
barcode_mismatch=config['bcl2fastq']['barcode_mismatch'],
ignore_missing_bcls=config['bcl2fastq']['ignore_missing_bcls'],
no_bgzf_compression=config['bcl2fastq']['no_bgzf_compression'],
merge_all_lanes=config['bcl2fastq']['merge_all_lanes'],
options=config['bcl2fastq']['options']
threads: config['bcl2fastq']['threads']
resources:
**config['bcl2fastq']['resources']
wrapper:
f"{manager.wrappers}/wrappers/bcl2fastq"
if config["general"]["mode"] == "bcl2fastq":

rule bcl2fastq:
input:
samplesheet=config['general']['samplesheet_file']
output:
"Stats/Stats.json",
params:
indir= config["general"]["input_directory"],
barcode_mismatch=config['bcl2fastq']['barcode_mismatch'],
ignore_missing_bcls=config['bcl2fastq']['ignore_missing_bcls'],
no_bgzf_compression=config['bcl2fastq']['no_bgzf_compression'],
merge_all_lanes=config['bcl2fastq']['merge_all_lanes'],
options=config['bcl2fastq']['options']
threads: config['bcl2fastq']['threads']
resources:
**config['bcl2fastq']['resources']
wrapper:
f"{manager.wrappers}/wrappers/bcl2fastq"

elif config["general"]["mode"] == "cellranger_atac":

rule cellranger_atac:
input:
samplesheet=config['general']['samplesheet_file']
output:
"Stats/Stats.json",
temp("TT")
params:
id="TT",
outdir=".",
indir= config["general"]["input_directory"],
container:
"https://zenodo.org/record/8423332/files/cellranger_atac_2.1.0.img"
resources:
**config['cellranger_atac']['resources']
shell:
"""
cellranger-atac mkfastq --id {params.id} --run {params.indir} --csv {input.samplesheet} \
--output-dir {params.outdir}

"""


rule plot_barplot_samples:
input: "Stats/Stats.json"
input:
"Stats/Stats.json"
output:
barplot="samples.png"
run:
Expand Down Expand Up @@ -171,7 +178,7 @@ rule dot2svg:
"""dot -Tsvg {input} -o {output}"""


localrules: rulegraph, check_samplesheet
localrules: rulegraph


onsuccess:
Expand Down
33 changes: 17 additions & 16 deletions sequana_pipelines/demultiplex/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def __init__(self, prog=NAME, epilog=None):
The 'merge' choice merges all lanes. The 'none' choice do NOT merge the lanes.
For NextSeq runs, we should merge the lanes; if users demultiplex NextSeq
and set this option to none, an error is raised. If you still want to
skip the merging step, then set this option to 'none_and_force'""")
skip the merging step, then set this option to 'none_and_force'. For sc-atac seq, use merge.""")
pipeline_group.add_argument("--bcl-directory", dest="bcl_directory",
required=True, help="""Directory towards the raw BCL files. This directory should
contains files such as RunParameters.xml, RunInfo.xml """)
Expand All @@ -81,6 +81,8 @@ def __init__(self, prog=NAME, epilog=None):
self.add_argument("--mars-seq", default=False, action="store_true",
help="""Set options to --minimum-trimmed-read-length 15 --mask-short-adapter-reads 15
and do not merge lanes""")
self.add_argument("--scatac-seq", default=False, action="store_true",
help="""Set options to perform single cell ATAC demultiplexing using cellranger.""")
self.add_argument("--run", default=False, action="store_true",
help="execute the pipeline directly")

Expand Down Expand Up @@ -127,15 +129,6 @@ def main(args=None):
logger.error(f"{options.bcl_directory} file does not exists")
sys.exit(1)

# Check the sample sheet
from sequana import iem
try:
samplesheet = iem.IEM(options.samplesheet)
samplesheet.validate()
except Exception as err:
logger.critical(err)
logger.critical("""Your sample sheet seems to be incorrect. Before running the pipeline
you will have to fix it. You may use 'sequana samplesheet --quick-fix'""")

# NextSeq
runparam_1 = options.bcl_directory + os.sep + "RunParameters.xml"
Expand Down Expand Up @@ -170,12 +163,7 @@ def main(args=None):
cfg.general.input_directory = os.path.abspath(options.bcl_directory)
cfg.bcl2fastq.threads = options.threads
cfg.bcl2fastq.barcode_mismatch = options.mismatch
cfg.bcl2fastq.samplesheet_file = os.path.abspath(options.samplesheet)

from sequana.iem import IEM
ss = IEM(cfg.bcl2fastq.samplesheet_file)
ss.validate()

cfg.general.samplesheet_file = os.path.abspath(options.samplesheet)

# this is defined by the working_directory
#cfg.bcl2fastq.output_directory = "."
Expand All @@ -193,6 +181,19 @@ def main(args=None):
if options.merging_strategy in ["merge"]:
logger.warning("with --mars-seq option, the merging strategy should be none_and_force")
cfg.bcl2fastq.merge_all_lanes = False
cfg.general.mode = "bcl2fastq"
elif options.scatac_seq:
cfg.cellranger_atac.options = ""
cfg.general.mode = "cellranger_atac"
else: # All other cases with bcl2fastq
from sequana.iem import IEM
cfg.general.mode = "bcl2fastq"
try:
ss = IEM(cfg.general.samplesheet_file)
ss.validate()
except Exception as err:
logger.critical(err)
logger.critical("""Your sample sheet seems to be incorrect. Before running the pipeline you will have to fix it. You may use 'sequana samplesheet --quick-fix'""")

# finalise the command and save it; copy the snakemake. update the config
# file and save it.
Expand Down
2 changes: 2 additions & 0 deletions sequana_pipelines/demultiplex/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
- bcl2fastq
- dot
- cellranger-atac
35 changes: 24 additions & 11 deletions sequana_pipelines/demultiplex/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,32 @@

type: map
mapping:
"general":
type: map
mapping:
"input_directory":
type: str
required: False
"sequana_wrappers":
type: str

"apptainers":
"general":
type: map
mapping:
"input_directory":
type: str
required: False
"samplesheet_file":
type: str
"mode":
type: str
enum: ["bcl2fastq", "cellranger_atac"]

"apptainers":
type: any


"bcl2fastq":
"bcl2fastq":
type: map
mapping:
"threads":
type: int
"barcode_mismatch":
type: int
"samplesheet_file":
type: str
"ignore_missing_controls":
type: bool
"ignore_missing_bcls":
Expand All @@ -39,4 +45,11 @@ mapping:
type: any
required: true


"cellranger_atac":
type: map
mapping:
"options":
type: str
"resources":
type: any
required: true
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from setuptools.command.install import install

_MAJOR = 1
_MINOR = 3
_MICRO = 1
_MINOR = 4
_MICRO = 0
version = '%d.%d.%d' % (_MAJOR, _MINOR, _MICRO)
release = '%d.%d' % (_MAJOR, _MINOR)

Expand Down
1 change: 1 addition & 0 deletions test/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,4 @@ def test_standalone_baddies(tmp_path):
cmd = cmd.format(bcldir, directory, "wrong")
subprocess.call(cmd.split())


0 comments on commit 4e5e1fb

Please sign in to comment.