Skip to content

Commit

Permalink
Merge pull request #762 from maxplanck-ie/cut_n_run
Browse files Browse the repository at this point in the history
mapping and coverage files for cut and tag data
  • Loading branch information
LeilyR committed May 12, 2021
2 parents d12515d + 00e31d4 commit 53fbd9b
Show file tree
Hide file tree
Showing 11 changed files with 44 additions and 30 deletions.
2 changes: 1 addition & 1 deletion .azure-pipelines/setup.yml
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ steps:
# installOptions: -c conda-forge -c bioconda --quiet
# createOptions: -c conda-forge -c bioconda --quiet --yes
- bash: |
conda create -n foo -q --yes --quiet -c conda-forge -c bioconda snakemake fuzzywuzzy mock sphinx sphinx-argparse sphinx_rtd_theme flake8 coreutils python=3.7
conda create -n foo -q --yes --quiet -c conda-forge -c bioconda snakemake=5.18.0 fuzzywuzzy mock sphinx sphinx-argparse sphinx_rtd_theme flake8 coreutils python=3.7
displayName: Installing dependencies
- bash: |
source activate foo
Expand Down
1 change: 0 additions & 1 deletion .ci_stuff/genome.fa.fai
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
1 10
2_spikein 10

Empty file modified snakePipes/shared/rscripts/DESeq2.R
100644 → 100755
Empty file.
6 changes: 3 additions & 3 deletions snakePipes/shared/rules/Bowtie2.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ if pairedEnd:
log: "Bowtie2/logs/{sample}.sort.log"
params:
bowtie2_index=bowtie2_index,
alignerOpts = str(alignerOpts or ' ') if not cutntag else " --end-to-end --very-sensitive "\
"--no-mixed --no-discordant --phred33 -I 10 -X 700 ",
alignerOpts = str(alignerOpts or ' ') if not cutntag else " --local --very-sensitive-local "\
"--no-mixed --no-discordant --phred33 -I 10 ",
mateOrientation = mateOrientation,
insertSizeMax = insertSizeMax,
insertSizeMax = str(insertSizeMax or ' ') if not cutntag else " 700 ",
tempDir = tempDir
benchmark:
"Bowtie2/.benchmark/Bowtie2.{sample}.benchmark"
Expand Down
3 changes: 2 additions & 1 deletion snakePipes/shared/rules/deepTools_qc.snakefile
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ rule multiBamSummary:
read_extension = "--extendReads" if pairedEnd
else "--extendReads {}".format(fragmentLength),
scaling_factors = "--scalingFactors deepTools_qc/multiBamSummary/scaling_factors.txt",
binSize = ""
binSize = "",
spikein_region = ""
log:
out = "deepTools_qc/logs/multiBamSummary.out",
err = "deepTools_qc/logs/multiBamSummary.err"
Expand Down
3 changes: 2 additions & 1 deletion snakePipes/shared/rules/deepTools_qc_allelic.snakefile
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ rule multiBamSummary_allelic:
read_extension = "--extendReads" if pairedEnd
else "--extendReads " + str(fragmentLength),
scaling_factors = "",
binSize = ""
binSize = "",
spikein_region = ""
log:
out = "deepTools_qc/logs/multiBamSummary_allelic.out",
err = "deepTools_qc/logs/multiBamSummary_allelic.err"
Expand Down
20 changes: 11 additions & 9 deletions snakePipes/shared/rules/split_bam_ops_ChIP_spikein.snakefile
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
part=['host','spikein']
blacklist_dict={"host": blacklist_bed,"spikein": spikein_blacklist_bed }
region_dict={"host": " ".join(host_chr),"spikein": " ".join(spikein_chr)}
region_dict={"host": " ".join(host_chr.keys()),"spikein": " ".join(spikein_chr.keys())}


def get_scaling_factor(sample,input):
Expand All @@ -20,7 +20,7 @@ def get_scaling_factor(sample,input):
return float(1)

rule split_bamfiles_by_genome:
input:
input:
bam = "filtered_bam/{sample}.filtered.bam",
bai = "filtered_bam/{sample}.filtered.bam.bai"
output:
Expand Down Expand Up @@ -49,7 +49,8 @@ rule multiBamSummary_input:
read_extension = "--extendReads" if pairedEnd
else "--extendReads {}".format(fragmentLength),
scaling_factors = "--scalingFactors split_deepTools_qc/multiBamSummary/{part}.input.scaling_factors.txt",
binSize = lambda wildcards: " --binSize 100000 " if wildcards.part=="spikein" else ""
binSize = lambda wildcards: " --binSize 100000 " if wildcards.part=="spikein" else "",
spikein_region = ""
log:
out = "split_deepTools_qc/logs/{part}.input_multiBamSummary.out",
err = "split_deepTools_qc/logs/{part}.input_multiBamSummary.err"
Expand All @@ -73,7 +74,9 @@ rule multiBamSummary_ChIP:
read_extension = "--extendReads" if pairedEnd
else "--extendReads {}".format(fragmentLength),
scaling_factors = "--scalingFactors split_deepTools_qc/multiBamSummary/{part}.ChIP.scaling_factors.txt",
binSize = lambda wildcards: " --binSize 100000 " if wildcards.part=="spikein" else ""
binSize = lambda wildcards: " --binSize "+str(spikein_bin_size) if wildcards.part=="spikein" else "",
spikein_region = lambda wildcards: " --region "+spikein_region if ((wildcards.part=="spikein") and (spikein_region != "")) else ""

log:
out = "split_deepTools_qc/logs/{part}.ChIP_multiBamSummary.out",
err = "split_deepTools_qc/logs/{part}.ChIP_multiBamSummary.err"
Expand All @@ -98,7 +101,7 @@ rule multiBamSummary_TSS:
read_extension = "--extendReads" if pairedEnd
else "--extendReads {}".format(fragmentLength),
scaling_factors = "--scalingFactors split_deepTools_qc/multiBamSummary_BED/spikein.ChIP.scaling_factors.txt",
binSize = " --binSize 100000 "
binSize = " --binSize 100000 "
log:
out = "split_deepTools_qc/logs/spikein.ChIP_multiBamSummary.BED.out",
err = "split_deepTools_qc/logs/spikein.ChIP_multiBamSummary.BED.err"
Expand All @@ -124,7 +127,7 @@ rule bamCoverage_by_part:
input:
bam = "split_bam/{sample}_host.bam" ,
bai = "split_bam/{sample}_host.bam.bai",
scale_factors = "split_deepTools_qc/multiBamSummary/{part}.ChIP.scaling_factors.txt"
scale_factors = "split_deepTools_qc/multiBamSummary/{part}.ChIP.scaling_factors.txt"
output:
"bamCoverage/{sample}.host.seq_depth_norm.BY{part}.bw"
params:
Expand All @@ -150,7 +153,7 @@ rule bamCoverage_by_TSS:
input:
bam = "split_bam/{sample}_host.bam" ,
bai = "split_bam/{sample}_host.bam.bai",
scale_factors = "split_deepTools_qc/multiBamSummary_BED/spikein.ChIP.scaling_factors.txt"
scale_factors = "split_deepTools_qc/multiBamSummary_BED/spikein.ChIP.scaling_factors.txt"
output:
"bamCoverage_TSS/{sample}.host.seq_depth_norm.BYspikein.bw"
params:
Expand All @@ -176,7 +179,7 @@ rule bamCoverage_by_input:
input:
bam = "split_bam/{sample}_host.bam" ,
bai = "split_bam/{sample}_host.bam.bai",
scale_factors = "split_deepTools_qc/multiBamSummary/spikein.input.scaling_factors.txt"
scale_factors = "split_deepTools_qc/multiBamSummary/spikein.input.scaling_factors.txt"
output:
"bamCoverage_input/{sample}.host.seq_depth_norm.BYspikein.bw"
params:
Expand Down Expand Up @@ -213,4 +216,3 @@ rule bamPE_fragment_size:
threads: 24
conda: CONDA_SHARED_ENV
shell: bamPEFragmentSize_cmd

5 changes: 3 additions & 2 deletions snakePipes/shared/tools/deeptools_cmds.snakefile
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ bamcov_RPKM_cmd = """
{params.blacklist} > {log.out} 2> {log.err}
"""

# bamCoverage RNA-seq unique mappings
# bamCoverage RNA-seq unique mappings
bamcov_unique_cmd = """
bamCoverage -b {input.bam} \
-o {output.bw_fwd} --binSize {params.bwBinSize} \
Expand All @@ -57,7 +57,7 @@ bamcov_unique_cmd = """
--minMappingQuality 10 --samFlagExclude 2304 --filterRNAstrand reverse \
-p {threads} >> {log.out} 2>> {log.err}
"""


# bamCoverage CHIP
bamcov_cmd = """
Expand Down Expand Up @@ -151,6 +151,7 @@ multiBamSummary_cmd = """
{params.blacklist} \
{params.scaling_factors} \
{params.binSize} \
{params.spikein_region} \
-p {threads} \
{params.read_extension} > {log.out} 2> {log.err}
"""
Expand Down
1 change: 1 addition & 0 deletions snakePipes/workflows/ChIP-seq/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ genome:
useSpikeInForNorm: false
getSizeFactorsFrom: genome
spikeinExt: _spikein
spikein_bin_size: 1000
## Which peak caller to use?
peakCaller: 'MACS2'
peakCallerOptions: --qvalue 0.001
Expand Down
28 changes: 18 additions & 10 deletions snakePipes/workflows/ChIP-seq/internals.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -225,24 +225,32 @@ def check_if_spikein_genome(genome_index,spikeinExt):
print("\n Error! Genome index file "+ genome_index +" not found!!!\n\n")
exit(1)

def get_host_and_spikein_chromosomes(genome_index,spikeinExt):
hostl=[]
spikeinl=[]
def get_host_and_spikein_chromosomes(genome_index, spikeinEx):
hostl=dict()
spikeinl=dict()
with open(genome_index) as ifile:
for line in ifile:
entry = line.split('\t')[0]
if re.search(spikeinExt, entry):
spikeinl.append(entry)
else:
hostl.append(entry)
try:
entry = line.split('\t')[0]
length = line.split('\t')[1]
if re.search(spikeinExt, entry):
spikeinl[entry] = length
else:
hostl[entry] = length
except:
warnings.warn("check for empty lines in the index file!")
continue
return([hostl,spikeinl])

if useSpikeInForNorm:
part=['host','spikein']
spikein_detected=check_if_spikein_genome(genome_index,spikeinExt)
if spikein_detected:
host_chr=get_host_and_spikein_chromosomes(genome_index,spikeinExt)[0]
spikein_chr=get_host_and_spikein_chromosomes(genome_index,spikeinExt)[1]
host_chr, spikein_chr =get_host_and_spikein_chromosomes(genome_index,spikeinExt)
spikein_region = ""
if len(spikein_chr.items()) == 1:
k, v = next(iter(spikein_chr.items()))
spikein_region = ":0:".join([str(k),str(v)])
else:
print("\n No spikein genome detected - no spikeIn chromosomes found with extention " + spikeinExt + " .\n\n")
exit(1)
5 changes: 3 additions & 2 deletions snakePipes/workflows/DNA-mapping/DNA-mapping
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,9 @@ def parse_args(defaults={"verbose": False, "configFile": None,

optional.add_argument("--cutntag",
help="if set, Bowti2 is used for mapping with parameters as has been used "
"in the method section of Kaya-okur et al. 2019. "
"Setting this flag overwrites the '--alignerOpts'."
"in the method section of Kaya-okur et al. 2019. ('--local --very-sensitive-local "
"--no-mixed --no-discordant --phred33 -I 10 -X 700')"
"Setting this flag overwrites the '--alignerOpts' and '--insertSizeMax'."
" Default is '%(default)s'.",
action="store_true")

Expand Down

0 comments on commit 53fbd9b

Please sign in to comment.