Skip to content

Commit

Permalink
Seacr ks (#965)
Browse files Browse the repository at this point in the history
* chip peak calling seacr

* seacr with spikein

* pytest seacr

* updated rtd sphix search extention version

* ruamel - fix ChIPtests

* include a ChIP run without samplesheet

---------

Co-authored-by: katarzyna.otylia.sikora@gmail.com <sikora@maximus.ie-freiburg.mpg.de>
Co-authored-by: WardDeb <ward@deboutte.be>
  • Loading branch information
3 people committed Jan 23, 2024
1 parent b3316e9 commit d430504
Show file tree
Hide file tree
Showing 18 changed files with 1,096 additions and 68 deletions.
19 changes: 18 additions & 1 deletion .ci_stuff/test_dag.sh
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,10 @@ WC=`DNA-mapping -m allelic-mapping -i PE_input -o output --snakemakeOptions " --
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2466 ]; then exit 1 ; fi

# ChIP-seq
WC=`ChIP-seq -d BAM_input --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 407 ]; then exit 1 ; fi
WC=`ChIP-seq -d BAM_input --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 399 ]; then exit 1 ; fi
WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 630 ]; then exit 1 ; fi
WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_broad_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
Expand All @@ -192,10 +196,19 @@ WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakema
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 628 ]; then exit 1 ; fi
WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --bigWigType log2ratio .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 562 ]; then exit 1 ; fi
WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller SEACR .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 759 ]; then exit 1 ; fi
WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller SEACR --useSpikeInForNorm .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1203 ]; then exit 1 ; fi
#noInput
WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 403 ]; then exit 1 ; fi
WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/organism.yaml .ci_stuff/ChIP.sample__noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 349 ]; then exit 1 ; fi
WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller SEACR .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 469 ]; then exit 1 ; fi
WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller SEACR --useSpikeInForNorm .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 696 ]; then exit 1 ; fi
# fromBAM
WC=`ChIP-seq -d outdir --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1187 ]; then exit 1 ; fi
Expand Down Expand Up @@ -329,6 +342,8 @@ WC=`scRNAseq -i PE_input -o output --mode Alevin --skipVelocyto --snakemakeOptio
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 641 ]; then exit 1 ; fi

# WGBS
WC=`WGBS -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1430 ]; then exit 1 ; fi
WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1475 ]; then exit 1 ; fi
WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --aligner bwameth2 --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
Expand All @@ -343,6 +358,8 @@ WC=`WGBS -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sample
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 559 ]; then exit 1 ; fi

# ATAC-seq
WC=`ATAC-seq -d BAM_input --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 524 ]; then exit 1 ; fi
WC=`ATAC-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 686 ]; then exit 1 ; fi
WC=`ATAC-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
Expand Down
2 changes: 2 additions & 0 deletions docs/content/News.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ snakePipes x.x.x
* Snakemake options in the defaults.yaml are now an empty string. The required arguments '--use-conda --conda-prefix' have been directly added to the command string. condaEnvDir is parsed from defaults.yaml, requiring running snakePipes config first.
* Added a 'three-prime-seq' mode to mRNAseq (David Koppstein and Katarzyna Sikora).
* Added support for multiple comparison groups to ChIPseq and ATAC-seq.
* Added SEACR as an optional peak caller to ChIPseq.
* Fixes #819
* Fixes #947
* Fixes #945
* Fixes #941
* Fixes #951
* fastq files are checked for validity
* an 'on success' file is touched in the output directory when a workflow is finished successfully
* fuzzywuzzy deprecated in favor for thefuzz
Expand Down
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
sphinx==4.2.0
sphinx_rtd_theme==1.0.0
readthedocs-sphinx-search==0.1.1
readthedocs-sphinx-search==0.3.2
sphinx-argparse
mock
3 changes: 2 additions & 1 deletion snakePipes/common_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def set_env_yamls():
'CONDA_PREPROCESSING_ENV': 'envs/preprocessing.yaml',
'CONDA_NONCODING_RNASEQ_ENV': 'envs/noncoding.yaml',
'CONDA_SAMBAMBA_ENV': 'envs/sambamba.yaml',
'CONDA_pysam_ENV': 'envs/pysam.yaml'}
'CONDA_pysam_ENV': 'envs/pysam.yaml',
'CONDA_SEACR_ENV': 'envs/chip_seacr.yaml'}


def merge_dicts(x, y):
Expand Down
5 changes: 5 additions & 0 deletions snakePipes/shared/rules/CSAW.multiComp.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ def getInputPeaks(peakCaller, chip_samples, genrichDict,comp_group):
return expand("MACS2/{chip_sample}.filtered.BAM_peaks.xls", chip_sample = chip_samples)
elif peakCaller == "HMMRATAC":
return expand("HMMRATAC/{chip_sample}_peaks.gappedPeak", chip_sample = chip_samples)
elif peakCaller == "SEACR":
if pipeline == "chip-seq" and useSpikeInForNorm:
return expand("SEACR/{chip_sample}_host.stringent.bed",chip_sample=chip_samples)
elif pipeline == "chip-seq" and not useSpikeInForNorm:
return expand("SEACR/{chip_sample}.filtered.stringent.bed",chip_sample=chip_samples)
else:
return expand("Genrich/{genrichGroup}.{{compGroup}}.narrowPeak", genrichGroup = genrichDict[comp_group].keys())

Expand Down
5 changes: 5 additions & 0 deletions snakePipes/shared/rules/CSAW.singleComp.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ def getInputPeaks(peakCaller, chip_samples, genrichDict):
return expand("MACS2/{chip_sample}.filtered.BAM_peaks.xls", chip_sample = chip_samples)
elif peakCaller == "HMMRATAC":
return expand("HMMRATAC/{chip_sample}_peaks.gappedPeak", chip_sample = chip_samples)
elif peakCaller == "SEACR":
if pipeline == "chip-seq" and useSpikeInForNorm:
return expand("SEACR/{chip_sample}_host.stringent.bed",chip_sample=chip_samples)
elif pipeline == "chip-seq" and not useSpikeInForNorm:
return expand("SEACR/{chip_sample}.filtered.stringent.bed",chip_sample=chip_samples)
else:
return expand("Genrich/{genrichGroup}.narrowPeak", genrichGroup = genrichDict.keys())

Expand Down
32 changes: 31 additions & 1 deletion snakePipes/shared/rules/ChIP_peak_calling.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ rule MACS2_peak_qc:

# TODO
# add joined deepTools plotEnrichment call for all peaks and samples in one plot

rule namesort_bams:
input:
bam = "filtered_bam/{sample}.filtered.bam"
Expand Down Expand Up @@ -230,3 +229,34 @@ else:
shell: """
Genrich -t {params.bams} {params.control_pfx} {params.control} -o {output} -r {params.blacklist} -w {params.frag_size} 2> {log}
"""


rule prep_bedgraph:
input: "filtered_bam/{sample}.namesorted.bam"
output: temp("filtered_bedgraph/{sample}.fragments.bedgraph")
log: "filtered_bedgraph/log/{sample}.log"
params:
sample = lambda wildcards: wildcards.sample,
genome = genome_index
conda: CONDA_RNASEQ_ENV
shell: """
bedtools bamtobed -bedpe -i {input} | awk '$1==$4 && $6-$2 < 1000 {{print $0}}' - | cut -f 1,2,6 - | sort -k1,1 -k2,2n -k3,3n > filtered_bedgraph/{params.sample}.fragments.bed
bedtools genomecov -bg -i filtered_bedgraph/{params.sample}.fragments.bed -g {params.genome} > {output}
"""

rule SEACR_peaks:
input:
chip = "filtered_bedgraph/{chip_sample}.fragments.bedgraph",
control = lambda wildcards: "filtered_bedgraph/"+get_control(wildcards.chip_sample)+".fragments.bedgraph" if get_control(wildcards.chip_sample)
else []
output:
"SEACR/{chip_sample}.filtered.stringent.bed"
log: "SEACR/logs/{chip_sample}.log"
params:
fdr = fdr,
prefix = os.path.join(outdir,"SEACR/{chip_sample}.filtered"),
script=os.path.join(maindir, "shared","tools/SEACR-1.3/SEACR_1.3.sh")
conda: CONDA_SEACR_ENV
shell: """
bash {params.script} {input.chip} {input.control} {params.fdr} "norm" "stringent" {params.prefix} 2>{log}
"""
27 changes: 27 additions & 0 deletions snakePipes/shared/rules/ChIP_peak_calling_spikein.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -237,3 +237,30 @@ else:
shell: """
Genrich -t {params.bams} {params.control_pfx} {params.control} -o {output} -r {params.blacklist} -e {params.spikein_chroms} -w {params.frag_size} 2> {log}
"""


rule prep_bedgraph:
input: "bamCoverage/{sample}.host_scaled.BYhost.bw"
output: temp("filtered_bedgraph/{sample}_host.fragments.bedgraph")
log: "filtered_bedgraph/log/{sample}.log"
conda: CONDA_SEACR_ENV
shell: """
bigWigToBedGraph {input} {output}
"""

rule SEACR_peaks:
input:
chip = "filtered_bedgraph/{chip_sample}_host.fragments.bedgraph",
control = lambda wildcards: "filtered_bedgraph/"+get_control(wildcards.chip_sample)+"_host.fragments.bedgraph" if get_control(wildcards.chip_sample)
else []
output:
"SEACR/{chip_sample}_host.stringent.bed"
log: "SEACR/logs/{chip_sample}.log"
params:
fdr = fdr,
prefix = os.path.join(outdir,"SEACR/{chip_sample}_host"),
script=os.path.join(maindir, "shared","tools/SEACR-1.3/SEACR_1.3.sh")
conda: CONDA_SEACR_ENV
shell: """
bash {params.script} {input.chip} {input.control} {params.fdr} "non" "stringent" {params.prefix} 2>{log}
"""
8 changes: 8 additions & 0 deletions snakePipes/shared/rules/envs/chip_seacr.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
name: ChIP-SEACR-env-1.0
channels:
- mpi-ie
- conda-forge
- bioconda
dependencies:
- seacr
- ucsc-bigwigtobedgraph

0 comments on commit d430504

Please sign in to comment.