Skip to content

Commit

Permalink
Merge pull request #682 from maxplanck-ie/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
katsikora committed Sep 1, 2020
2 parents 4b961d8 + 0473d65 commit 9c59d06
Show file tree
Hide file tree
Showing 11 changed files with 86 additions and 25 deletions.
64 changes: 55 additions & 9 deletions .ci_stuff/test_dag.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,50 @@ touch BAM_input/sample1.bam \
BAM_input/Sambamba/sample5.markdup.txt \
BAM_input/Sambamba/sample6.markdup.txt \
BAM_input/deepTools_qc/bamPEFragmentSize/fragmentSize.metric.tsv
mkdir -p allelic_BAM_input/allelic_bams allelic_BAM_input/filtered_bam allelic_BAM_input/deepTools_qc/bamPEFragmentSize allelic_BAM_input/Sambamba
touch allelic_BAM_input/allelic_bams/sample1.genome1.sorted.bam \
allelic_BAM_input/allelic_bams/sample1.genome2.sorted.bam \
allelic_BAM_input/allelic_bams/sample2.genome1.sorted.bam \
allelic_BAM_input/allelic_bams/sample2.genome2.sorted.bam \
allelic_BAM_input/allelic_bams/sample3.genome1.sorted.bam \
allelic_BAM_input/allelic_bams/sample3.genome2.sorted.bam \
allelic_BAM_input/allelic_bams/sample4.genome1.sorted.bam \
allelic_BAM_input/allelic_bams/sample4.genome2.sorted.bam \
allelic_BAM_input/allelic_bams/sample5.genome1.sorted.bam \
allelic_BAM_input/allelic_bams/sample5.genome2.sorted.bam \
allelic_BAM_input/allelic_bams/sample6.genome1.sorted.bam \
allelic_BAM_input/allelic_bams/sample6.genome2.sorted.bam \
allelic_BAM_input/allelic_bams/sample1.genome1.sorted.bam.bai \
allelic_BAM_input/allelic_bams/sample1.genome2.sorted.bam.bai \
allelic_BAM_input/allelic_bams/sample2.genome1.sorted.bam.bai \
allelic_BAM_input/allelic_bams/sample2.genome2.sorted.bam.bai \
allelic_BAM_input/allelic_bams/sample3.genome1.sorted.bam.bai \
allelic_BAM_input/allelic_bams/sample3.genome2.sorted.bam.bai \
allelic_BAM_input/allelic_bams/sample4.genome1.sorted.bam.bai \
allelic_BAM_input/allelic_bams/sample4.genome2.sorted.bam.bai \
allelic_BAM_input/allelic_bams/sample5.genome1.sorted.bam.bai \
allelic_BAM_input/allelic_bams/sample5.genome2.sorted.bam.bai \
allelic_BAM_input/allelic_bams/sample6.genome1.sorted.bam.bai \
allelic_BAM_input/allelic_bams/sample6.genome2.sorted.bam.bai \
allelic_BAM_input/deepTools_qc/bamPEFragmentSize/fragmentSize.metric.tsv \
allelic_BAM_input/filtered_bam/sample1.filtered.bam \
allelic_BAM_input/filtered_bam/sample2.filtered.bam \
allelic_BAM_input/filtered_bam/sample3.filtered.bam \
allelic_BAM_input/filtered_bam/sample4.filtered.bam \
allelic_BAM_input/filtered_bam/sample5.filtered.bam \
allelic_BAM_input/filtered_bam/sample6.filtered.bam \
allelic_BAM_input/filtered_bam/sample1.filtered.bam.bai \
allelic_BAM_input/filtered_bam/sample2.filtered.bam.bai \
allelic_BAM_input/filtered_bam/sample3.filtered.bam.bai \
allelic_BAM_input/filtered_bam/sample4.filtered.bam.bai \
allelic_BAM_input/filtered_bam/sample5.filtered.bam.bai \
allelic_BAM_input/filtered_bam/sample6.filtered.bam.bai \
allelic_BAM_input/Sambamba/sample1.markdup.txt \
allelic_BAM_input/Sambamba/sample2.markdup.txt \
allelic_BAM_input/Sambamba/sample3.markdup.txt \
allelic_BAM_input/Sambamba/sample4.markdup.txt \
allelic_BAM_input/Sambamba/sample5.markdup.txt \
allelic_BAM_input/Sambamba/sample6.markdup.txt
mkdir -p output
touch /tmp/genes.gtf /tmp/genome.fa /tmp/genome.fa.fai /tmp/rmsk.txt /tmp/genes.bed /tmp/spikein_genes.gtf
mkdir -p allelic_input
Expand Down Expand Up @@ -111,12 +155,14 @@ WC=`ChIP-seq -d outdir --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeO
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 715 ]; then exit 1 ; fi
# fromBAM and spikein
WC=`ChIP-seq -d outdir --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 714 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 591 ]; then exit 1 ; fi
WC=`ChIP-seq -d outdir --useSpikeInForNorm --getSizeFactorsFrom TSS --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 591 ]; then exit 1 ; fi
WC=`ChIP-seq -d outdir --useSpikeInForNorm --getSizeFactorsFrom input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 579 ]; then exit 1 ; fi

# allelic
WC=`ChIP-seq -d allelic_BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 331 ]; then exit 1 ; fi

# mRNA-seq
WC=`mRNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
Expand Down Expand Up @@ -146,12 +192,12 @@ if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 909 ]; then exit 1 ; fi
WC=`mRNA-seq -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 593 ]; then exit 1 ; fi
#allelic
WC=`mRNA-seq -m allelic-mapping -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1,strain2 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1053 ]; then exit 1 ; fi
WC=`mRNA-seq -m allelic-mapping -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --SNPfile allelic_input/snpfile.txt --NMaskedIndex allelic_input/Ngenome .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1036 ]; then exit 1 ; fi
WC=`mRNA-seq -m allelic-mapping -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1053 ]; then exit 1 ; fi
WC=`mRNA-seq -m allelic-mapping,deepTools_qc -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1,strain2 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1450 ]; then exit 1 ; fi
WC=`mRNA-seq -m allelic-mapping,deepTools_qc -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --SNPfile allelic_input/snpfile.txt --NMaskedIndex allelic_input/Ngenome .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1433 ]; then exit 1 ; fi
WC=`mRNA-seq -m allelic-mapping,deepTools_qc -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1450 ]; then exit 1 ; fi

# noncoding-RNA-seq
WC=`noncoding-RNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
Expand Down Expand Up @@ -214,4 +260,4 @@ if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 386 ]; then exit 1 ; fi
WC=`preprocessing -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --DAG --fastqc --optDedupDist 2500 | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 386 ]; then exit 1 ; fi

rm -rf SE_input PE_input BAM_input output allelic_input /tmp/genes.gtf /tmp/genome.fa /tmp/genome.fa.fai /tmp/rmsk.txt /tmp/genes.bed /tmp/spikein_genes.gtf
rm -rf SE_input PE_input BAM_input output allelic_input allelic_BAM_input /tmp/genes.gtf /tmp/genome.fa /tmp/genome.fa.fai /tmp/rmsk.txt /tmp/genes.bed /tmp/spikein_genes.gtf
2 changes: 1 addition & 1 deletion conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package:
name: snakepipes
version: 2.2.0
version: 2.2.1

source:
path: ../
Expand Down
8 changes: 8 additions & 0 deletions docs/content/News.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
snakePipes News
===============

snakePipes 2.2.1
----------------

* Fix a bug in DAG for ChIPseq allelic with CSAW.
* Fixed deepTools qc DAG for ChIPseq with spikein.
* Added DAG test for allelic ChIPseq.
* Fixed a bug with deepTools QC for allelic mRNAseq.

snakePipes 2.2.0
----------------
* Added Alevin mode in scRNA workflow
Expand Down
2 changes: 1 addition & 1 deletion snakePipes/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '2.2.0'
__version__ = '2.2.1'
2 changes: 1 addition & 1 deletion snakePipes/shared/rscripts/CSAW.R
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ if(useSpikeInForNorm){
}

## get DB regions
print("Performing differential binding")
message("Performing differential binding")
chip_results <- getDBregions_chip(chip_object, plotfile = "DiffBinding_modelfit.pdf")

## write output
Expand Down
8 changes: 5 additions & 3 deletions snakePipes/shared/rscripts/DB_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ readfiles_chip <- function(sampleSheet, fragmentLength, window_size, alleleSpeci
# for 1 samples, use normal design
message("1 sample used : comparing genome2 to genome1")
designm <- model.matrix(~ allele, data = design)
rownames(designm)<-sampleSheet$name
rownames(designm)<-paste0(design$name,".",design$allele)
designType <- "allele"
} else {
# for 1 sample, use interaction design
message(">1 samples used : comparing genome2 to genome1 blocking for different conditions")
designm <- model.matrix(~ allele + condition,data = design)
rownames(designm)<-sampleSheet$name
rownames(designm)<-paste0(design$name,".",design$allele)
designType <- "blocking"
}

Expand Down Expand Up @@ -255,7 +255,9 @@ getDBregions_chip <- function(chipCountObject, plotfile = NULL){

# Make DGElist
y <- csaw::asDGEList(chipCountObject$windowCounts, norm.factors = chipCountObject$normFactors)
colnames(y)<-sampleInfo$name
if(chipCountObject$designType=="condition"){
colnames(y)<-chipCountObject$sampleInfo$name}else{
colnames(y)<-paste0(rep(chipCountObject$sampleSheet$name,each=2),".genome",c(1,2))}
design <- chipCountObject$design
# Estimate dispersions
y <- edgeR::estimateDisp(y, design)
Expand Down
7 changes: 4 additions & 3 deletions snakePipes/shared/rules/CSAW.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,11 @@ def getBamCoverage():
return []

def getHeatmapInput():
if pipeline in 'ATAC-seq' or pipeline in 'chip-seq' and not getSizeFactorsFrom == "genome":
if pipeline in 'ATAC-seq' or pipeline in 'chip-seq':
return(expand("CSAW_{}_{}".format(peakCaller, sample_name) + "/CSAW.{change_dir}.cov.heatmap.png", change_dir=['UP','DOWN']))
else:
return(expand("CSAW_{}_{}".format(peakCaller, sample_name) + "/CSAW.{change_dir}.cov.heatmap.png", change_dir=['UP','DOWN']) + expand("CSAW_{}_{}".format(peakCaller, sample_name) + "/CSAW.{change_dir}.log2r.heatmap.png", change_dir=['UP', 'DOWN']))
if not useSpikeInForNorm:
return(expand("CSAW_{}_{}".format(peakCaller, sample_name) + "/CSAW.{change_dir}.cov.heatmap.png", change_dir=['UP','DOWN']) + expand("CSAW_{}_{}".format(peakCaller, sample_name) + "/CSAW.{change_dir}.log2r.heatmap.png", change_dir=['UP', 'DOWN']))


## CSAW for differential binding / allele-specific binding analysis
Expand Down Expand Up @@ -87,7 +88,7 @@ rule CSAW:


if allele_info == 'FALSE':
if pipeline in 'chip-seq' and getSizeFactorsFrom == "genome":
if pipeline in 'chip-seq' and not useSpikeInForNorm:
rule calc_matrix_log2r_CSAW:
input:
csaw_in = "CSAW_{}_{}/CSAW.session_info.txt".format(peakCaller, sample_name),
Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/rules/deepTools_ChIP_allelic.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ rule bamCompare_log2_genome1:
else "--extendReads " + str(fragmentLength),
blacklist = "--blackListFileName " + blacklist_bed if blacklist_bed
else "",
scaleFactors = " --scaleFactorsMethod readCount "
log:
out = "deepTools_ChIP/logs/bamCompare.log2ratio.{chip_sample}.{control_name}.genome1.out",
err = "deepTools_ChIP/logs/bamCompare.log2ratio.{chip_sample}.{control_name}.genome1.err"
Expand All @@ -41,6 +42,7 @@ rule bamCompare_log2_genome2:
else "--extendReads " + str(fragmentLength),
blacklist = "--blackListFileName " + blacklist_bed if blacklist_bed
else "",
scaleFactors = " --scaleFactorsMethod readCount "
log:
out = "deepTools_ChIP/logs/bamCompare.log2ratio.{chip_sample}.{control_name}.genome2.out",
err = "deepTools_ChIP/logs/bamCompare.log2ratio.{chip_sample}.{control_name}.genome2.err"
Expand Down
2 changes: 1 addition & 1 deletion snakePipes/shared/rules/multiQC.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def multiqc_input_check(return_value):
indir += " Qualimap_qc "
elif pipeline=="rna-seq":
# must be RNA-mapping, add files as per the mode
if "alignment" in mode or "deepTools_qc" in mode:
if "alignment" in mode or "deepTools_qc" in mode and not "allelic-mapping" in mode:
infiles.append( expand(aligner+"/{sample}.bam", sample = samples) +
expand("Sambamba/{sample}.markdup.txt", sample = samples) +
expand("deepTools_qc/estimateReadFiltering/{sample}_filtering_estimation.txt",sample=samples)+
Expand Down
2 changes: 1 addition & 1 deletion snakePipes/shared/tools/deeptools_cmds.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ plotEnrich_chip_cmd = """
plotFingerprint_cmd = """
plotFingerprint \
-b {input.bams} \
{params.labels} \
--labels {params.labels} \
--plotTitle 'Cumulative read counts per bin without duplicates' \
--ignoreDuplicates \
--outQualityMetrics {output.metrics} \
Expand Down
12 changes: 7 additions & 5 deletions snakePipes/workflows/ChIP-seq/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,13 @@ def run_deepTools_allelic():

def run_CSAW():
if sampleSheet:
file_list=["CSAW_{}_{}/CSAW.session_info.txt".format(peakCaller, sample_name),"Annotation/genes.filtered.symbol","Annotation/genes.filtered.t2g","CSAW_{}_{}/CSAW.Stats_report.html".format(peakCaller, sample_name)]
file_list.append([os.path.join("CSAW_{}_{}".format(peakCaller, sample_name), x) for x in list(itertools.chain.from_iterable([expand("CSAW.{change_dir}.cov.matrix",change_dir=change_direction),expand("CSAW.{change_dir}.cov.heatmap.png",change_dir=change_direction)]))])
file_list.append([os.path.join("AnnotatedResults_{}_{}".format(peakCaller,sample_name), x) for x in list(itertools.chain.from_iterable([expand("Filtered.results.{change_dir}_withNearestGene.txt",change_dir=change_direction)]))])
if getSizeFactorsFrom == "genome":
file_list.append([os.path.join("CSAW_{}_{}".format(peakCaller, sample_name), x) for x in list(itertools.chain.from_iterable([expand("CSAW.{change_dir}.log2r.matrix",change_dir=change_direction),expand("CSAW.{change_dir}.log2r.heatmap.png",change_dir=change_direction)]))])
file_list=["CSAW_{}_{}/CSAW.session_info.txt".format(peakCaller, sample_name)]
if allele_info == 'FALSE':
file_list.append(["Annotation/genes.filtered.symbol","Annotation/genes.filtered.t2g","CSAW_{}_{}/CSAW.Stats_report.html".format(peakCaller, sample_name)])
file_list.append([os.path.join("CSAW_{}_{}".format(peakCaller, sample_name), x) for x in list(itertools.chain.from_iterable([expand("CSAW.{change_dir}.cov.matrix",change_dir=change_direction),expand("CSAW.{change_dir}.cov.heatmap.png",change_dir=change_direction)]))])
file_list.append([os.path.join("AnnotatedResults_{}_{}".format(peakCaller,sample_name), x) for x in list(itertools.chain.from_iterable([expand("Filtered.results.{change_dir}_withNearestGene.txt",change_dir=change_direction)]))])
if not useSpikeInForNorm:
file_list.append([os.path.join("CSAW_{}_{}".format(peakCaller, sample_name), x) for x in list(itertools.chain.from_iterable([expand("CSAW.{change_dir}.log2r.matrix",change_dir=change_direction),expand("CSAW.{change_dir}.log2r.heatmap.png",change_dir=change_direction)]))])
return( file_list )
else:
return([])
Expand Down

0 comments on commit 9c59d06

Please sign in to comment.