Skip to content

Commit

Permalink
Extentions ks (#1008)
Browse files Browse the repository at this point in the history
* fix formula for allelic mRNAseq multicomp

* ci tests for allelic multicomp

* ci tests chipseq multicomp no control

* fix allelic mapping multicomp

* minor version bump

* CSAW MACS2 fix


---------

Co-authored-by: katarzyna.otylia.sikora@gmail.com <sikora@maximus.ie-freiburg.mpg.de>
  • Loading branch information
katsikora and katarzyna.otylia.sikora@gmail.com committed Apr 19, 2024
1 parent 57c2144 commit 7d94a6f
Show file tree
Hide file tree
Showing 14 changed files with 163 additions and 30 deletions.
22 changes: 17 additions & 5 deletions .ci_stuff/test_dag.sh
Expand Up @@ -269,24 +269,28 @@ WC=`ChIP-seq -d outdir --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1418 ]; then exit 1 ; fi
#multiComp and spikein
WC=`ChIP-seq -d BAM_input --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1255 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1335 ]; then exit 1 ; fi
WC=`ChIP-seq -d BAM_input --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1274 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1354 ]; then exit 1 ; fi
#multiComp and spikein and noInput
WC=`ChIP-seq -d BAM_input --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 853 ]; then exit 1 ; fi
WC=`ChIP-seq -d BAM_input --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 839 ]; then exit 1 ; fi
#multiComp and spikein and fromBam
WC=`ChIP-seq -d outdir --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1560 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1640 ]; then exit 1 ; fi
WC=`ChIP-seq -d outdir --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1045 ]; then exit 1 ; fi
#multiComp and spikein and fromBam and noInput
WC=`ChIP-seq -d outdir --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1059 ]; then exit 1 ; fi
WC=`ChIP-seq -d outdir --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1045 ]; then exit 1 ; fi
#multiComp and noInput and fromBam
WC=`ChIP-seq -d outdir --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller SEACR --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1078 ]; then exit 1 ; fi


# mRNA-seq
WC=`mRNA-seq -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
Expand Down Expand Up @@ -340,9 +344,17 @@ WC=`mRNA-seq -m allelic-mapping,deepTools_qc,alignment-free -i PE_input -o outpu
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 3294 ]; then exit 1 ; fi
WC=`mRNA-seq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 659 ]; then exit 1 ; fi
WC=`mRNA-seq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 659 ]; then exit 1 ; fi
#allelic+multicomp
WC=`mRNA-seq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 690 ]; then exit 1 ; fi
WC=`mRNA-seq -m allelic-mapping,deepTools_qc -i allelic_BAM_input/filtered_bam --fromBAM --bamExt '.filtered.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --SNPfile allelic_input/snpfile.txt --NMaskedIndex allelic_input/Ngenome .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1439 ]; then exit 1 ; fi
WC=`mRNA-seq -m allelic-mapping,deepTools_qc,alignment-free -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 3302 ]; then exit 1 ; fi



#noncoding RNA-seq
WC=`noncoding-RNA-seq -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1370 ]; then exit 1 ; fi
WC=`noncoding-RNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l`
Expand Down
2 changes: 1 addition & 1 deletion conda-recipe/meta.yaml
@@ -1,6 +1,6 @@
package:
name: snakepipes
version: 2.8.2
version: 2.9.0

source:
path: ../
Expand Down
2 changes: 1 addition & 1 deletion docs/content/News.rst
@@ -1,7 +1,7 @@
snakePipes News
===============

snakePipes 2.8.2
snakePipes 2.9.0
________________

* added SEACR peaks qc
Expand Down
2 changes: 1 addition & 1 deletion snakePipes/__init__.py
@@ -1 +1 @@
__version__ = '2.8.2'
__version__ = '2.9.0'
2 changes: 1 addition & 1 deletion snakePipes/shared/defaults.yaml
Expand Up @@ -7,7 +7,7 @@
# permitted here.
################################################################################
#
condaEnvDir: '/package/mamba/envs/snakepipesenvs/2.8.2'
condaEnvDir: '/package/mamba/envs/snakepipesenvs/2.9.0'
snakemakeOptions: ''
organismsDir: 'shared/organisms'
clusterConfig: 'shared/cluster.yaml'
Expand Down
3 changes: 2 additions & 1 deletion snakePipes/shared/rscripts/CSAW.R
Expand Up @@ -128,9 +128,10 @@ if (! external_bed) {
bed.gr = GRanges(seqnames = bed$V1, ranges = IRanges(start = bed$V2, end = bed$V3), name = bed$V4)
return(bed.gr)
})
}
# merge
allpeaks <- Reduce(function(x,y) GenomicRanges::union(x,y), allpeaks)
}

} else {
bed = read.delim(snakemake@input[['peaks']],header=FALSE)
bed.gr = GRanges(seqnames = bed$V1, ranges = IRanges(start = bed$V2, end = bed$V3), name = bed$V4)
Expand Down
4 changes: 2 additions & 2 deletions snakePipes/shared/rscripts/DESeq2.R
Expand Up @@ -34,7 +34,7 @@ importfunc <- snakemake@params[["importfunc"]]
allelic_info <- as.logical(snakemake@params[["allele_info"]])
tx2gene_file <- snakemake@params[["tx2gene_file"]]
rmdTemplate <- snakemake@params[["rmdTemplate"]]
formulaInput <- snakemake@params[["formula"]]
formulaInput <- as.character(snakemake@params[["formula"]])
wdir <- snakemake@params[["outdir"]]

setwd(wdir)
Expand Down Expand Up @@ -129,7 +129,7 @@ if(length(unique(sampleInfo$condition))>1){

## Run allele-sepecific DESeq wrapper (if asked for)
if (isTRUE(allelic_info)) {
seqout_allelic <- DESeq_allelic(countdata, coldata = sampleInfo, fdr = fdr, from_salmon=tximport)
seqout_allelic <- DESeq_allelic(countdata, coldata = sampleInfo, fdr = fdr, from_salmon=tximport, customFormula = NA)

DESeq_writeOutput(DEseqout = seqout_allelic,
fdr = fdr, outprefix = "DEseq_allelic",
Expand Down
4 changes: 2 additions & 2 deletions snakePipes/shared/rscripts/DE_functions.R
Expand Up @@ -95,7 +95,7 @@ checktable <- function(countdata = NA, sampleSheet = NA, alleleSpecific = FALSE,
DESeq_basic <- function(countdata, coldata, fdr, alleleSpecific = FALSE, from_salmon = FALSE, size_factors=NA, customFormula=NA) {
cnames.sub<-unique(colnames(coldata)[2:which(colnames(coldata) %in% "condition")])

if(is.na(customFormula)){
if(is.na(customFormula)|customFormula==""){
d<-as.formula(noquote(paste0("~",paste(cnames.sub,collapse="+"))))
} else {

Expand Down Expand Up @@ -150,7 +150,7 @@ DESeq_basic <- function(countdata, coldata, fdr, alleleSpecific = FALSE, from_sa
#'
#'

DESeq_allelic <- function(countdata, coldata, fdr, from_salmon=FALSE) {
DESeq_allelic <- function(countdata, coldata, fdr, from_salmon=FALSE, customFormula=NA) {

# AlleleSpecific DEseq
print("Performing Allele-specific DESeq using Interaction design : Genome2 vs Genome1")
Expand Down
2 changes: 1 addition & 1 deletion snakePipes/shared/rules/CSAW.multiComp.snakefile
Expand Up @@ -61,7 +61,7 @@ def getHeatmapInput():
if pipeline in 'ATAC-seq':
return(expand("CSAW_{}_{}".format(peakCaller, sample_name + ".{{compGroup}}") + "/CSAW.{change_dir}.cov.heatmap.png", change_dir=['UP','DOWN']))
elif pipeline in 'chip-seq':
if not useSpikeInForNorm:
if chip_samples_w_ctrl:
return(expand("CSAW_{}_{}".format(peakCaller, sample_name + ".{{compGroup}}") + "/CSAW.{change_dir}.cov.heatmap.png", change_dir=['UP','DOWN']) + expand("CSAW_{}_{}".format(peakCaller, sample_name + ".{{compGroup}}") + "/CSAW.{change_dir}.log2r.heatmap.png", change_dir=['UP', 'DOWN']))
else:
return(expand("CSAW_{}_{}".format(peakCaller, sample_name + ".{{compGroup}}") + "/CSAW.{change_dir}.cov.heatmap.png", change_dir=['UP','DOWN']))
Expand Down
4 changes: 2 additions & 2 deletions snakePipes/shared/rules/DESeq2.multipleComp.snakefile
Expand Up @@ -19,7 +19,7 @@ checkpoint split_sampleSheet:
## DESeq2 (on featureCounts)
rule DESeq2:
input:
counts_table = lambda wildcards : "featureCounts/counts_allelic.tsv" if 'allelic-mapping' in mode else "featureCounts/counts.tsv",
counts_table = lambda wildcards : "featureCounts/counts_allelic.tsv" if 'allelic-mapping' in mode or "allelic-counting" in mode else "featureCounts/counts.tsv",
sampleSheet = lambda wildcards: checkpoints.split_sampleSheet.get(compGroup=wildcards.compGroup).output,
symbol_file = "Annotation/genes.filtered.symbol" #get_symbol_file
output:
Expand All @@ -32,7 +32,7 @@ rule DESeq2:
sampleSheet = lambda wildcards,input: os.path.join(outdir,str(input.sampleSheet)),
fdr = fdr,
importfunc = os.path.join(maindir, "shared", "rscripts", "DE_functions.R"),
allele_info = lambda wildcards : 'TRUE' if 'allelic-mapping' in mode else 'FALSE',
allele_info = lambda wildcards : 'TRUE' if 'allelic-mapping' in mode or "allelic-counting" in mode else 'FALSE',
tx2gene_file = 'NA',
rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd"),
formula = config["formula"],
Expand Down
17 changes: 10 additions & 7 deletions snakePipes/workflows/mRNA-seq/Snakefile
Expand Up @@ -127,13 +127,13 @@ include:os.path.join(maindir, "shared", "rules", "RNA-seq_qc_report.snakefile")

## DESeq2
if sampleSheet and not "three-prime-seq" in mode:
if isMultipleComparison and not "allelic-mapping" in mode and not "allelic-counting" in mode:
if isMultipleComparison :
include: os.path.join(maindir, "shared", "rules", "DESeq2.multipleComp.snakefile")
if rMats:
if rMats and not "allelic-mapping" in mode and not "allelic-counting" in mode:
include: os.path.join(maindir, "shared", "rules", "rMats.multipleComp.snakefile")
else:
include: os.path.join(maindir, "shared", "rules", "DESeq2.singleComp.snakefile")
if rMats:
if rMats and not "allelic-mapping" in mode and not "allelic-counting" in mode:
include: os.path.join(maindir, "shared", "rules", "rMats.singleComp.snakefile")

## MultiQC
Expand Down Expand Up @@ -257,10 +257,13 @@ def run_allelesp_mapping():
]
if sampleSheet:
sample_name = os.path.splitext(os.path.basename(sampleSheet))[0]
file_list.append( ["DESeq2_{}/DESeq2.session_info.txt".format(sample_name)] )
if sampleSheet and rMats:
file_list.append( ["rMats_{}/RI.MATS.JCEC.txt".format(sample_name)] )
file_list.append( ["rMats_{}/b2.csv".format(sample_name)] )
if not isMultipleComparison:
file_list.append( ["DESeq2_{}/DESeq2.session_info.txt".format(sample_name)] )
#if rMats:
#file_list.append( ["rMats_{}/RI.MATS.JCEC.txt".format(sample_name)] )
#file_list.append( ["rMats_{}/b2.csv".format(sample_name)] )
else:
file_list.append(expand("DESeq2_{}".format(sample_name) + ".{compGroup}/DESeq2.session_info.txt",compGroup=cf.returnComparisonGroups(sampleSheet)))
return(file_list)
else:
return([])
Expand Down
2 changes: 1 addition & 1 deletion snakePipes/workflows/mRNA-seq/defaults.yaml
Expand Up @@ -64,7 +64,7 @@ clusterPAS:
## further options
mode: alignment,deepTools_qc
sampleSheet:
formula:
formula: ""
rMats: False
bwBinSize: 25
fastqc: False
Expand Down
2 changes: 1 addition & 1 deletion snakePipes/workflows/mRNA-seq/mRNA-seq
Expand Up @@ -27,7 +27,7 @@ def parse_args(defaults={"verbose": False, "configFile": None,
"alignerOptions": None,
"featureCountsOptions": "-C -Q 10 --primary",
"filterGTF": None, "sampleSheet": None,
"formula": None,
"formula": "",
"reads": ["_R1", "_R2"], "ext": ".fastq.gz",
"bwBinSize": 25, "dnaContam": False, "plotFormat": "png",
"fromBAM": False, "bamExt": ".bam", "pairedEnd": True,
Expand Down

0 comments on commit 7d94a6f

Please sign in to comment.