Skip to content

Commit

Permalink
cut and run/tag peak calling options (#751)
Browse files Browse the repository at this point in the history
* modified peak calling options to be able to handle the cut and run data

* removed extra params

* macs2 for single end data

* small fixes for chip pipeline and started adding bowtie2 params to the dna_mapping pipeline.

* took care of empty fragment size file.

* fixed the number of runs for the chipseq dry run

* more changes in the number of lines in dry test test

* typo

* changing the number of lines dag test

* changed macs2 with spike to be able to handle the cutntag changes

* test_dag

* Update test_dag.sh

* Update test_dag.sh

* Update test_dag.sh

* genome size parameters

* bowtie2/dnamaaping

* number of lines in dag

* more fix for dag line numbers

* added the python version to the azure pipeline

* fixed the number of lines for the allelic pipeline

* auzre test on create conda env

* azure test update

* conda prefix

* flake

* more flake

* more developtment on create env python3.7

* flake

* typo

Co-authored-by: Leily Rabbani <rabbani@maximus.ie-freiburg.mpg.de>
Co-authored-by: Leily Rabbani <rabbani@pc390.ie-freiburg.mpg.de>
  • Loading branch information
3 people committed Mar 16, 2021
1 parent f5b5f75 commit b28b23a
Show file tree
Hide file tree
Showing 13 changed files with 178 additions and 92 deletions.
38 changes: 38 additions & 0 deletions .azure-pipelines/testCreateEnv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env python

import subprocess as sp
import sys
import argparse

sys.path.append("..")
from snakePipes import common_functions as cf


def parse_args():
parser = argparse.ArgumentParser()
required = parser.add_argument_group('Required Arguments')
required.add_argument("-d",
dest="envs_chunk",
help="chunk of env to be used when running `snakePipes createEnvs`",
required=True)
return parser


parser = parse_args()
args = parser.parse_args()

keys = []
length = len(cf.set_env_yamls().items())
index = length / 3
index = round(index)
if args.envs_chunk == str(1):
envs = range(0, index)
elif args.envs_chunk == str(2):
envs = range(index, 2 * index)
elif args.envs_chunk == str(3):
envs = range(2 * index, length)

for i in range(index):
keys.append(list(cf.set_env_yamls())[i])
envs_to_test = " ".join(keys)
sp.check_output("snakePipes createEnvs --only {} --force".format(envs_to_test), shell=True)
22 changes: 11 additions & 11 deletions .ci_stuff/test_dag.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ touch SE_input/sample1_R1.fastq.gz \
SE_input/sample5_R1.fastq.gz \
SE_input/sample6_R1.fastq.gz
# Needed by ChIP and ATAC workflows
mkdir -p BAM_input/deepTools_qc/bamPEFragmentSize BAM_input/filtered_bam BAM_input/Sambamba BAM_input/bamCoverage
mkdir -p BAM_input/deepTools_qc/bamPEFragmentSize BAM_input/filtered_bam BAM_input/Sambamba BAM_input/bamCoverage
touch BAM_input/sample1.bam \
BAM_input/sample2.bam \
BAM_input/sample3.bam \
Expand Down Expand Up @@ -53,7 +53,7 @@ touch BAM_input/sample1.bam \
BAM_input/bamCoverage/sample3.filtered.seq_depth_norm.bw \
BAM_input/bamCoverage/sample4.filtered.seq_depth_norm.bw \
BAM_input/bamCoverage/sample5.filtered.seq_depth_norm.bw \
BAM_input/bamCoverage/sample6.filtered.seq_depth_norm.bw
BAM_input/bamCoverage/sample6.filtered.seq_depth_norm.bw

mkdir -p allelic_BAM_input/allelic_bams allelic_BAM_input/filtered_bam allelic_BAM_input/deepTools_qc/bamPEFragmentSize allelic_BAM_input/Sambamba allelic_BAM_input/bamCoverage/allele_specific
touch allelic_BAM_input/allelic_bams/sample1.genome1.sorted.bam \
Expand Down Expand Up @@ -104,7 +104,7 @@ touch allelic_BAM_input/allelic_bams/sample1.genome1.sorted.bam \
allelic_BAM_input/bamCoverage/allele_specific/sample3.genome1.seq_depth_norm.bw \
allelic_BAM_input/bamCoverage/allele_specific/sample4.genome1.seq_depth_norm.bw \
allelic_BAM_input/bamCoverage/allele_specific/sample5.genome1.seq_depth_norm.bw \
allelic_BAM_input/bamCoverage/allele_specific/sample6.genome1.seq_depth_norm.bw
allelic_BAM_input/bamCoverage/allele_specific/sample6.genome1.seq_depth_norm.bw
mkdir -p output
touch /tmp/genes.gtf /tmp/genome.fa /tmp/genome.fa.fai /tmp/rmsk.txt /tmp/genes.bed /tmp/spikein_genes.gtf
mkdir -p allelic_input
Expand Down Expand Up @@ -156,29 +156,29 @@ if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1297 ]; then exit 1 ; fi

# ChIP-seq
WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 401 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 403 ]; then exit 1 ; fi
WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 403 ]; then exit 1 ; fi
WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --singleEnd .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 401 ]; then exit 1 ; fi
WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --bigWigType log2ratio .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 363 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 365 ]; then exit 1 ; fi
# fromBAM
WC=`ChIP-seq -d outdir --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 715 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 717 ]; then exit 1 ; fi
# spikein
WC=`ChIP-seq -d BAM_input --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 433 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 435 ]; then exit 1 ; fi
# fromBAM and spikein
WC=`ChIP-seq -d outdir --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 591 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 593 ]; then exit 1 ; fi
WC=`ChIP-seq -d outdir --useSpikeInForNorm --getSizeFactorsFrom TSS --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 591 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 593 ]; then exit 1 ; fi
WC=`ChIP-seq -d outdir --useSpikeInForNorm --getSizeFactorsFrom input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 579 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 581 ]; then exit 1 ; fi
# allelic
WC=`ChIP-seq -d allelic_BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 331 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 333 ]; then exit 1 ; fi

# mRNA-seq
WC=`mRNA-seq -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
Expand Down
21 changes: 17 additions & 4 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
- template: .azure-pipelines/setup.yml
- bash: |
source activate foo
flake8 --ignore=E501,E722 --exclude docs/conf.py .
flake8 --ignore=E501,E722,E402 --exclude docs/conf.py .
displayName: flake8
- job: 'CI_tests'
Expand Down Expand Up @@ -70,10 +70,23 @@ jobs:
- bash: echo "##vso[task.prependpath]/usr/share/miniconda/bin"
displayName: Add conda to PATH
- template: .azure-pipelines/setup.yml
- task: UsePythonVersion@0
displayName: 'Use Python 3.7'
inputs:
versionSpec: 3.7
- bash: |
source activate foo
snakePipes createEnvs
displayName: "createEnvs"
python .azure-pipelines/testCreateEnv.py -d 1
displayName: "createEnvs1"
- bash: |
source activate foo
python .azure-pipelines/testCreateEnv.py -d 2
displayName: "createEnvs2"
- bash: |
source activate foo
python .azure-pipelines/testCreateEnv.py -d 3
displayName: "createEnvs3"
- job: 'envs_OSX'
pool:
Expand All @@ -95,7 +108,7 @@ jobs:
sudo chown 501:20 /Users/vsts/.conda/pkgs/urls.txt
fi
displayName: Fix OSX permissions
- template: .azure-pipelines/macosx_setup.yml
- template: .azure-pipelines/setup.yml
- bash: |
source activate foo
sudo snakePipes createEnvs
Expand Down
1 change: 1 addition & 0 deletions snakePipes/shared/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# Note that due to limitations in yaml.dump, only very basic structures are
# permitted here.
################################################################################
#
snakemakeOptions: ' --use-conda --conda-prefix /package/anaconda3/envs/ '
organismsDir: 'shared/organisms'
clusterConfig: 'shared/cluster.yaml'
Expand Down
3 changes: 2 additions & 1 deletion snakePipes/shared/rules/Bowtie2.snakefile
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ if pairedEnd:
log: "Bowtie2/logs/{sample}.sort.log"
params:
bowtie2_index=bowtie2_index,
alignerOpts = str(alignerOpts or ''),
alignerOpts = str(alignerOpts or ' ') if not cutntag else " --end-to-end --very-sensitive "\
"--no-mixed --no-discordant --phred33 -I 10 -X 700 ",
mateOrientation = mateOrientation,
insertSizeMax = insertSizeMax,
tempDir = tempDir
Expand Down
47 changes: 26 additions & 21 deletions snakePipes/shared/rules/ChIP_peak_calling.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,24 @@ if pairedEnd:
rule MACS2:
input:
chip = "filtered_bam/{chip_sample}.filtered.bam",
control =
lambda wildcards: "filtered_bam/"+get_control(wildcards.chip_sample)+".filtered.bam" if get_control(wildcards.chip_sample)
else [],
insert_size_metrics = "deepTools_qc/bamPEFragmentSize/fragmentSize.metric.tsv"
frag_size = "deepTools_qc/bamPEFragmentSize/fragmentSize.metric.tsv"
output:
peaks = "MACS2/{chip_sample}.filtered.BAM_peaks.xls",
peaksPE = "MACS2/{chip_sample}.filtered.BAMPE_peaks.xls"
params:
genome_size = genome_size,
broad_calling =
lambda wildcards: "--broad" if is_broad(wildcards.chip_sample) else "",
lambda wildcards: "--broad " if is_broad(wildcards.chip_sample) else "",
control_param =
lambda wildcards: "-c filtered_bam/"+get_control(wildcards.chip_sample)+".filtered.bam" if get_control(wildcards.chip_sample)
lambda wildcards: " -c filtered_bam/"+get_control(wildcards.chip_sample)+".filtered.bam" if get_control(wildcards.chip_sample)
else "",
qval_cutoff=qval,
mfold=mfold
genome_size = str(genome_size),
ext_size =
lambda wildcards: " --nomodel --extsize "+get_pe_frag_length(wildcards.chip_sample,
"deepTools_qc/bamPEFragmentSize/fragmentSize.metric.tsv") \
if not cutntag else " ",
peakCaller_options = lambda wildcards: str(peakCallerOptions or '') if not cutntag else " -p 1e-5 ",
bampe_options = lambda wildcards: str(BAMPEPeaks or '')if not cutntag else " ",
bam_options = lambda wildcards: str(BAMPeaks or '') if not cutntag else " "
log:
out = "MACS2/logs/MACS2.{chip_sample}.filtered.out",
err = "MACS2/logs/MACS2.{chip_sample}.filtered.err"
Expand All @@ -41,18 +43,20 @@ if pairedEnd:
shell: """
macs2 callpeak -t {input.chip} {params.control_param} \
-f BAM \
-g {params.genome_size} --qvalue {params.qval_cutoff}\
{params.bam_options} \
-g {params.genome_size} \
{params.ext_size} \
--keep-dup all \
--outdir MACS2 \
--name {wildcards.chip_sample}.filtered.BAM \
--nomodel \
--mfold {params.mfold}\
--extsize $(cat {input.insert_size_metrics} | grep filtered_bam/{wildcards.chip_sample}.filtered.bam | awk '{{printf("%i",$6)}}') \
{params.peakCaller_options} \
{params.broad_calling} > {log.out} 2> {log.err}
# also run MACS2 in paired-end mode BAMPE for comparison with single-end mode
macs2 callpeak -t {input.chip} \
{params.control_param} -f BAMPE --qvalue {params.qval_cutoff}\
{params.control_param} -f BAMPE \
{params.bampe_options} \
{params.peakCaller_options} \
-g {params.genome_size} --keep-dup all \
--outdir MACS2 --name {wildcards.chip_sample}.filtered.BAMPE \
{params.broad_calling} > {log.out}.BAMPE 2> {log.err}.BAMPE
Expand All @@ -67,26 +71,27 @@ else:
output:
peaks = "MACS2/{chip_sample}.filtered.BAM_peaks.xls",
params:
genome_size = int(genome_size),
genome_size = str(genome_size),
broad_calling =
lambda wildcards: "--broad" if is_broad(wildcards.chip_sample)
else "",
control_param =
lambda wildcards: "-c filtered_bam/"+get_control(wildcards.chip_sample)+".filtered.bam" if get_control(wildcards.chip_sample)
lambda wildcards: " -c filtered_bam/"+get_control(wildcards.chip_sample)+".filtered.bam" if get_control(wildcards.chip_sample)
else "",
frag_size=fragmentLength,
mfold=mfold,
qval_cutoff=qval
peakCaller_options = str(peakCallerOptions or ''),
bam_options = str(BAMPeaks or '')
log:
out = "MACS2/logs/MACS2.{chip_sample}.filtered.out",
err = "MACS2/logs/MACS2.{chip_sample}.filtered.err"
benchmark:
"MACS2/.benchmark/MACS2.{chip_sample}.filtered.benchmark"
conda: CONDA_CHIPSEQ_ENV
shell: """
macs2 callpeak -t {input.chip} {params.control_param} -f BAM -g {params.genome_size} --qvalue {params.qval_cutoff} --keep-dup all --outdir MACS2 \
--name {wildcards.chip_sample}.filtered.BAM --mfold {params.mfold} --extsize {params.frag_size}\
{params.broad_calling} > {log.out} 2> {log.err}
macs2 callpeak -t {input.chip} {params.control_param} -f BAM -g {params.genome_size} \
{params.peakCaller_options} --keep-dup all --outdir MACS2 \
--name {wildcards.chip_sample}.filtered.BAM {params.bam_options} --extsize {params.frag_size} \
{params.broad_calling} > {log.out} 2> {log.err}
"""


Expand Down
37 changes: 21 additions & 16 deletions snakePipes/shared/rules/ChIP_peak_calling_spikein.snakefile
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,24 @@ if pairedEnd:
rule MACS2:
input:
chip = "split_bam/{chip_sample}_host.bam",
control =
lambda wildcards: "split_bam/"+get_control(wildcards.chip_sample)+"_host.bam" if get_control(wildcards.chip_sample)
else [],
insert_size_metrics = "split_deepTools_qc/bamPEFragmentSize/host.fragmentSize.metric.tsv"
output:
peaks = "MACS2/{chip_sample}_host.BAM_peaks.xls",
peaksPE = "MACS2/{chip_sample}_host.BAMPE_peaks.xls"
params:
genome_size = genome_size,
genome_size = str(genome_size),
broad_calling =
lambda wildcards: "--broad" if is_broad(wildcards.chip_sample) else "",
control_param =
lambda wildcards: "-c split_bam/"+get_control(wildcards.chip_sample)+"_host.bam" if get_control(wildcards.chip_sample)
else "",
qval_cutoff=qval,
mfold=mfold
ext_size =
lambda wildcards: " --nomodel --extsize "+get_pe_frag_length(wildcards.chip_sample,
"split_deepTools_qc/bamPEFragmentSize/host.fragmentSize.metric.tsv") \
if not cutntag else " ",
peakCaller_options = lambda wildcards: str(peakCallerOptions or '') if not cutntag else " -p 1e-5 ",
bampe_options = lambda wildcards: str(BAMPEPeaks or '')if not cutntag else " ",
bam_options = lambda wildcards: str(BAMPeaks or '') if not cutntag else " "
log:
out = "MACS2/logs/MACS2.{chip_sample}_host.filtered.out",
err = "MACS2/logs/MACS2.{chip_sample}_host.filtered.err"
Expand All @@ -43,18 +45,20 @@ if pairedEnd:
shell: """
macs2 callpeak -t {input.chip} {params.control_param} \
-f BAM \
-g {params.genome_size} --qvalue {params.qval_cutoff}\
{params.bam_options} \
-g {params.genome_size} \
{params.ext_size} \
--keep-dup all \
--outdir MACS2 \
--name {wildcards.chip_sample}_host.BAM \
--nomodel \
--mfold {params.mfold}\
--extsize $(cat {input.insert_size_metrics} | grep split_bam/{wildcards.chip_sample}_host.bam | awk '{{printf("%i",$6)}}') \
{params.peakCaller_options} \
{params.broad_calling} > {log.out} 2> {log.err}
# also run MACS2 in paired-end mode BAMPE for comparison with single-end mode
macs2 callpeak -t {input.chip} \
{params.control_param} -f BAMPE --qvalue {params.qval_cutoff}\
{params.control_param} -f BAMPE \
{params.bampe_options} \
{params.peakCaller_options} \
-g {params.genome_size} --keep-dup all \
--outdir MACS2 --name {wildcards.chip_sample}_host.BAMPE \
{params.broad_calling} > {log.out}.BAMPE 2> {log.err}.BAMPE
Expand All @@ -77,18 +81,19 @@ else:
lambda wildcards: "-c split_bam/"+get_control(wildcards.chip_sample)+"_host.bam" if get_control(wildcards.chip_sample)
else "",
frag_size=fragmentLength,
mfold=mfold,
qval_cutoff=qval
peakCaller_options = str(peakCallerOptions or ''),
bam_options = str(BAMPeaks or '')
log:
out = "MACS2/logs/MACS2.{chip_sample}_host.filtered.out",
err = "MACS2/logs/MACS2.{chip_sample}_host.filtered.err"
benchmark:
"MACS2/.benchmark/MACS2.{chip_sample}_host.filtered.benchmark"
conda: CONDA_CHIPSEQ_ENV
shell: """
macs2 callpeak -t {input.chip} {params.control_param} -f BAM -g {params.genome_size} --qvalue {params.qval_cutoff} --keep-dup all --outdir MACS2 \
--name {wildcards.chip_sample}_host.BAM --mfold {params.mfold} --extsize {params.frag_size}\
{params.broad_calling} > {log.out} 2> {log.err}
macs2 callpeak -t {input.chip} {params.control_param} -f BAM -g {params.genome_size} \
{params.peakCaller_options} --keep-dup all --outdir MACS2 \
--name {wildcards.chip_sample}_host.BAM {params.bam_options} --extsize {params.frag_size} \
{params.broad_calling} > {log.out} 2> {log.err}
"""


Expand Down

0 comments on commit b28b23a

Please sign in to comment.