Skip to content

Commit

Permalink
Merge pull request #842 from maxplanck-ie/allelic_fixes
Browse files Browse the repository at this point in the history
Allelic fixes
  • Loading branch information
katsikora committed Sep 23, 2022
2 parents 2f04fe4 + d8da571 commit 51028e6
Show file tree
Hide file tree
Showing 11 changed files with 57 additions and 28 deletions.
1 change: 1 addition & 0 deletions docs/content/News.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ snakePipes 2.x.x
* Updated CSAW output.
* Fixed a couple of issues in the ATAC-seq workflow after sofware versions update.
* Updated organism yamls.
* Added apeglm2 logFC shrinkage to allelic DESeq2 results.


snakePipes 2.5.4
Expand Down
2 changes: 1 addition & 1 deletion snakePipes/common_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def set_env_yamls():
'CONDA_RNASEQ_ENV': 'envs/rna_seq.yaml',
'CONDA_RMATS_ENV': 'envs/rMats.yaml',
'CONDA_scRNASEQ_ENV': 'envs/sc_rna_seq.yaml',
'CONDA_seurat3_ENV': 'envs/sc_rna_seq_seurat3.yaml',
'CONDA_seurat_ENV': 'envs/sc_rna_seq_seurat.yaml',
'CONDA_loompy_ENV': 'envs/sc_rna_seq_loompy.yaml',
'CONDA_alevinqc_ENV': 'envs/sc_rna_seq_alevinqc.yaml',
'CONDA_eisaR_ENV': 'envs/sc_rna_seq_eisaR.yaml',
Expand Down
20 changes: 14 additions & 6 deletions snakePipes/shared/rscripts/DESeq2.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,12 @@ cat(paste("Salmon tx2gene file : ", tx2gene_file, "\n"))
## ~~~~~ 1. SETUP ~~~~~
## sampleInfo (setup of the experiment)
sampleInfo <- read.table(sampleInfoFilePath, header = TRUE, stringsAsFactor = F)
#in case of allelic mode with just one group - don't relevel?
#in this case, comparison will be done between genome1 and genome2
if( length(unique(sampleInfo$condition))>1){
sampleInfo$condition <- as.factor(sampleInfo$condition)
sampleInfo$condition <- relevel(sampleInfo$condition, ref = as.character(sampleInfo$condition[1])) # first sample defines base
} else { message("Allelic workflow with one condition detected. Releveling skipped. Comparison will be done between genome1 and genome2.")}

## add X at the beginning of rows beginning with a number (makes it consistent to column names of of the count matrix!)
#if ( any(grepl("^[0-9]", sampleInfo$name)) ) {
Expand Down Expand Up @@ -87,12 +91,14 @@ if(isTRUE(tximport)) {
}

## ~~~~~~~ 3. run DESeq wrapper ~~~~~~~~
seqout <- DESeq_basic(countdata, coldata = sampleInfo, fdr = fdr, alleleSpecific = allelic_info, from_salmon = tximport)
#in case of the allelic-specific workflow, allow for 1 condition and skip deseq2 basic in this case
if(length(unique(sampleInfo$condition))>1){
seqout <- DESeq_basic(countdata, coldata = sampleInfo, fdr = fdr, alleleSpecific = allelic_info, from_salmon = tximport)

DESeq_writeOutput(DEseqout = seqout,
DESeq_writeOutput(DEseqout = seqout,
fdr = fdr, outprefix = "DEseq_basic",
geneNamesFile = geneNamesFilePath)

}
#DESeq_downstream(DEseqout = seqout, countdata, sampleInfo,
# fdr = fdr, outprefix = "DEseq_basic", heatmap_topN = topN,
# geneNamesFile = geneNamesFilePath)
Expand Down Expand Up @@ -125,9 +131,10 @@ bib <- c(
write.bibtex(bib, file = 'citations.bib')
file.copy(rmdTemplate, to = 'DESeq2_report_basic.Rmd')

outprefix = "DEseq_basic"
cite_options(citation_format = "text",style = "html",cite.style = "numeric",hyperlink = TRUE)
render('DESeq2_report_basic.Rmd',
if(length(unique(sampleInfo$condition))>1){
outprefix = "DEseq_basic"
cite_options(citation_format = "text",style = "html",cite.style = "numeric",hyperlink = TRUE)
render('DESeq2_report_basic.Rmd',
output_format = "html_document",
clean = TRUE,
params = list(
Expand All @@ -138,6 +145,7 @@ render('DESeq2_report_basic.Rmd',
fdr = 0.05,
heatmap_topN = 20,
geneNamesFile = geneNamesFilePath))
}

if (isTRUE(allelic_info)) {
file.copy(rmdTemplate, to = 'DESeq2_report_allelic.Rmd')
Expand Down
25 changes: 20 additions & 5 deletions snakePipes/shared/rscripts/DE_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ checktable <- function(countdata = NA, sampleSheet = NA, alleleSpecific = FALSE,
}
}
}
if(all(is.integer(countdata))){
print("All countdata is integer.")
}else{
print("Non-integer counts detected. The data will be rounded, as this is well within the expected sampling variation of a technical replicate.")
countdata<-round(countdata) }
return(countdata)
}

Expand Down Expand Up @@ -143,12 +148,22 @@ DESeq_allelic <- function(countdata, coldata, fdr) {
rownames(design)<-colnames(rnasamp)

# Run DESeq
dds <- DESeq2::DESeqDataSetFromMatrix(rnasamp, colData = design,
if(length(unique(design$condition))>1){
dds <- DESeq2::DESeqDataSetFromMatrix(rnasamp, colData = design,
design = ~allele + condition + allele:condition)
rownames(dds) <- rownames(rnasamp)
dds <- DESeq2::DESeq(dds,betaPrior = FALSE)
ddr <- DESeq2::results(dds, name=paste0("allelegenome2.condition",unique(coldata$condition)[2]))
output <- list(dds = dds, ddr = ddr, ddr_shrunk=NULL)
rownames(dds) <- rownames(rnasamp)
dds <- DESeq2::DESeq(dds,betaPrior = FALSE)
ddr <- DESeq2::results(dds, name=paste0("allelegenome2.condition",unique(coldata$condition)[2]))
ddr_shrunk <- DESeq2::lfcShrink(dds,coef=paste0("allelegenome2.condition",unique(coldata$condition)[2]),type="apeglm",res=ddr)
} else {
dds <- DESeq2::DESeqDataSetFromMatrix(rnasamp, colData = design,
design = ~allele)
rownames(dds) <- rownames(rnasamp)
dds <- DESeq2::DESeq(dds,betaPrior = FALSE)
ddr <- DESeq2::results(dds, name="allele_genome2_vs_genome1")
ddr_shrunk <- DESeq2::lfcShrink(dds,coef="allele_genome2_vs_genome1",type="apeglm",res=ddr)
}
output <- list(dds = dds, ddr = ddr, ddr_shrunk=ddr_shrunk)
return(output)
}

Expand Down
1 change: 1 addition & 0 deletions snakePipes/shared/rscripts/merge_featureCounts.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ get_df <- function(infile) {

if(isallelic(df) == TRUE) {
print("Counts are allele-specific")
bname = gsub(".allelic", "" ,bname)
df <- df[,c(1,7:9)]
colnames(df)[2:4] <- c(paste0(bname,"_all"), paste0(bname, "_genome", 1:2) )
} else {
Expand Down
4 changes: 2 additions & 2 deletions snakePipes/shared/rules/RNA_mapping_allelic.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ if aligner == "STAR":
else:
rule STAR_allele:
input:
r1 = fastq_dir+"/{sample}.fastq.gz",
r1 = fastq_dir+"/{sample}"+reads[0]+".fastq.gz",
index = star_index_allelic
output:
temp(aligner+"/{sample}.sorted.bam")
Expand All @@ -76,7 +76,7 @@ if aligner == "STAR":
--genomeDir {params.idx} \
--sjdbGTFfile {params.gtf} \
--sjdbOverhang 100 \
--readFilesIn <(gunzip -c {input}) \
--readFilesIn <(gunzip -c {input.r1}) \
--outFileNamePrefix {params.prefix} \
--outSAMunmapped Within \
--alignEndsType EndToEnd \
Expand Down
8 changes: 6 additions & 2 deletions snakePipes/shared/rules/SNPsplit.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ if aligner == "Bowtie2":
bam = "filtered_bam/{sample}.filtered.bam"
output:
targetbam = expand("allelic_bams/{{sample}}.filtered.{suffix}.bam", suffix = ['allele_flagged', 'genome1', 'genome2', 'unassigned']),
tempbam = temp("filtered_bam/{sample}.filtered.sortedByName.bam")
tempbam = temp("filtered_bam/{sample}.filtered.sortedByName.bam"),
rep1 = "allelic_bams/{sample}.SNPsplit_report.yaml",
rep2 = "allelic_bams/{sample}.SNPsplit_sort.yaml"
log: "allelic_bams/logs/{sample}.snp_split.log"
params:
pairedEnd = '--paired' if pairedEnd else '',
Expand All @@ -24,7 +26,9 @@ elif aligner == "STAR":
bam = aligner+"/{sample}.bam"
output:
targetbam = expand("allelic_bams/{{sample}}.{suffix}.bam", suffix = ['allele_flagged', 'genome1', 'genome2', 'unassigned']),
tempbam = temp(aligner+"/{sample}.sortedByName.bam")
tempbam = temp(aligner+"/{sample}.sortedByName.bam"),
rep1 = "allelic_bams/{sample}.SNPsplit_report.yaml",
rep2 = "allelic_bams/{sample}.SNPsplit_sort.yaml"
log: "allelic_bams/logs/{sample}.snp_split.log"
params:
pairedEnd = '--paired' if pairedEnd else '',
Expand Down
2 changes: 1 addition & 1 deletion snakePipes/shared/rules/envs/sc_rna_seq_seurat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ channels:
- bioconda
dependencies:
- r-seurat = 4.1.1
- bioconductor-dropletutils
- bioconductor-dropletutils
7 changes: 0 additions & 7 deletions snakePipes/shared/rules/envs/sc_rna_seq_seurat3.yaml

This file was deleted.

7 changes: 7 additions & 0 deletions snakePipes/shared/rules/multiQC.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ def multiqc_input_check(return_value):
if qualimap:
infiles.append( expand("Qualimap_qc/{sample}.filtered.bamqc_results.txt", sample = samples) )
indir += " Qualimap_qc "
if "allelic-mapping" in mode:
infiles.append( expand("allelic_bams/{sample}.SNPsplit_report.yaml", sample = samples) )
infiles.append( expand("allelic_bams/{sample}.SNPsplit_sort.yaml", sample = samples) )
indir += "allelic_bams"
elif pipeline=="rna-seq":
# must be RNA-mapping, add files as per the mode
if "alignment" in mode or "deepTools_qc" in mode and not "allelic-mapping" in mode:
Expand All @@ -63,6 +67,9 @@ def multiqc_input_check(return_value):
if "allelic-mapping" in mode:
infiles.append( expand("featureCounts/{sample}.allelic_counts.txt", sample = samples) )
indir += aligner + " featureCounts "
infiles.append( expand("allelic_bams/{sample}.SNPsplit_report.yaml", sample = samples) )
infiles.append( expand("allelic_bams/{sample}.SNPsplit_sort.yaml", sample = samples) )
indir += "allelic_bams"
if "alignment-free" in mode:
infiles.append( expand("Salmon/{sample}/quant.sf", sample = samples) )
indir += " Salmon "
Expand Down
8 changes: 4 additions & 4 deletions snakePipes/shared/rules/scRNAseq_STARsolo.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ rule STARsolo_report:
samples = samples
log:
out = "STARsolo/logs/Report.out"
conda: CONDA_seurat3_ENV
conda: CONDA_seurat_ENV
script: "../rscripts/scRNAseq_report.R"


Expand Down Expand Up @@ -151,7 +151,7 @@ rule STARsolo_raw_to_seurat:
samples = samples
log:
out = "Seurat/STARsolo_raw/logs/seurat.out"
conda: CONDA_seurat3_ENV
conda: CONDA_seurat_ENV
script: "../rscripts/scRNAseq_Seurat3.R"

rule STARsolo_filtered_to_seurat:
Expand All @@ -165,7 +165,7 @@ rule STARsolo_filtered_to_seurat:
samples = samples
log:
out = "Seurat/STARsolo_filtered/logs/seurat.out"
conda: CONDA_seurat3_ENV
conda: CONDA_seurat_ENV
script: "../rscripts/scRNAseq_Seurat3.R"


Expand All @@ -182,7 +182,7 @@ rule remove_empty_drops:
samples = samples
log:
out = "Seurat/STARsolo_raw_RmEmptyDrops/logs/seurat.out"
conda: CONDA_seurat3_ENV
conda: CONDA_seurat_ENV
script: "../rscripts/scRNAseq_EmptyDrops.R"


Expand Down

0 comments on commit 51028e6

Please sign in to comment.