Skip to content

Commit

Permalink
Merge pull request #366 from maxplanck-ie/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
dpryan79 committed Dec 7, 2018
2 parents 584695c + c84c72d commit 898986b
Show file tree
Hide file tree
Showing 33 changed files with 103 additions and 33 deletions.
2 changes: 1 addition & 1 deletion conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package:
name: snakepipes
version: 1.1.1
version: 1.1.2

source:
path: ../
Expand Down
3 changes: 2 additions & 1 deletion docs/content/News.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ snakePipes 1.1.2

* A number of minor bug fixes and enhancements in the HiC and WGBS pipelines
* The RNA-seq pipeline now uses samtools for sorting. This should avoid issues with STAR running out of memory during the output sorting step.
* Increased the memory allocation for MACS2 to 8GB
* Increased the memory allocation for MACS2 to 8GB and bamPEFragmentSize to 3G
* Fixed the scRNA-seq pipeline, which seems to have been broken in 1.1.1

snakePipes 1.1.1
----------------
Expand Down
2 changes: 1 addition & 1 deletion snakePipes/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.1.1'
__version__ = '1.1.2'
1 change: 1 addition & 0 deletions snakePipes/shared/rscripts/CSAW.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env Rscript
## ChIPseq differential binding workflow
.libPaths(R.home("library"))

sampleInfoFilePath <- snakemake@input[["sampleSheet"]] #"samplesheet.tab"
insert_size_metrics <- snakemake@input[["insert_size_metrics"]] # bamPEFragmentSize output
Expand Down
3 changes: 3 additions & 0 deletions snakePipes/shared/rscripts/DB_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
#' @examples
#' readfiles_chip(csvFile = "testBAMs/testSampleSheet.csv", refAllele = "pat")
#'

.libPaths(R.home("library"))

readfiles_chip <- function(sampleSheet, fragment_length, window_size, alleleSpecific = FALSE, pe.param){

# check that not >2 conditions are given
Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/rscripts/DESeq2.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
# args 6 : T/F whether or not the workflow is allele-sepecific
# args 7 : tx2gene file for salmon --> DESeq mode

.libPaths(R.home("library"))

args = commandArgs(TRUE)


Expand Down
7 changes: 4 additions & 3 deletions snakePipes/shared/rscripts/DESeq2Report.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ params:

```{r setup}
#### Libraries needed
.libPaths(R.home('library'))
## Bioconductor
library('DESeq2')
Expand Down Expand Up @@ -458,16 +459,16 @@ plotCounts_gg <- function(i, dds, intgroup) {
## Change in version 1.15.3
## It might not be necessary to have any of this if else, but I'm not
## sure that plotCounts(returnData) will always return the 'group' variable.
if('group' %in% colnames(data)) {
if('condition' %in% colnames(data)) {
data$group <- group
} else {
data <- cbind(data, data.frame('group' = group))
}
ggplot(data, aes(x = group, y = count)) + geom_point() + ylab('Normalized count') + ggtitle(i) + coord_trans(y = "log10") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
ggplot(data, aes(x = group, y = count)) + geom_jitter(width=0.2) + ylab('Normalized count') + ggtitle(i) + coord_trans(y = "log10") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
}
for(i in head(features, nBestFeatures)) {
print(plotCounts_gg(i, dds = dds, intgroup = intgroup))
print(plotCounts_gg(i, dds = dds, intgroup = intgroup[2:length(intgroup)]))
}
```

Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/rscripts/DE_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#' @examples
#'

.libPaths(R.home("library"))

checktable <- function(countdata = NA, sampleSheet = NA, alleleSpecific = FALSE, salmon_dir = NA, tx2gene_annot = NA) {

## check whether colnames are allele-specific
Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/rscripts/WGBS_QC_report_template.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ params:
---

```{r setup, include=FALSE}
.libPaths(R.home("library"))
knitr::opts_chunk$set(echo = FALSE)
```

Expand Down
4 changes: 3 additions & 1 deletion snakePipes/shared/rscripts/WGBS_stats_report_template.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ title: "`r paste0('WGBS ',params$stat_category,' stats report')`"
---

```{r setup, include=FALSE}
.libPaths(R.home("library"))
knitr::opts_chunk$set(echo = FALSE, out.width = '50%',out.height='50%')
source(params$input_func)
```
Expand Down Expand Up @@ -68,4 +70,4 @@ A volcano plot was produced to visualize the effect of threshold application ont
```{r, fig.cap=get_fig_cap(params$outdir,"volcano")}
volcano_plot<-dir(params$outdir,pattern="*volcano.plot.png",full.names=TRUE)
knitr::include_graphics(volcano_plot)
```
```
2 changes: 2 additions & 0 deletions snakePipes/shared/rscripts/WGBSpipe.POM.filt.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.libPaths(R.home("library"))

#run in R-3.3.1
#a few lines of code from methylCtools bcall2beta by Hovestadt et al. 2014 were retained
#CpG position handling and coverage and Beta calculations by Katarzyna Sikora
Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/rscripts/WGBSpipe.interval_stats.limma.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.libPaths(R.home("library"))

#run in R-3.3.1
#set working directory
wdir<-commandArgs(trailingOnly=TRUE)[1]
Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/rscripts/WGBSpipe.metilene_stats.limma.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.libPaths(R.home("library"))

#run in R-3.3.1
#set working directory
wdir<-commandArgs(trailingOnly=TRUE)[1]
Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/rscripts/WGBSpipe.prep_data_for_stats.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.libPaths(R.home("library"))

#run in R-3.3.1
#set working directory
wdir<-commandArgs(trailingOnly=TRUE)[1]
Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/rscripts/WGBSpipe.singleCpGstats.limma.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.libPaths(R.home("library"))

#run in R-3.3.1
#set working directory
wdir<-commandArgs(trailingOnly=TRUE)[1]
Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/rscripts/WGBSstats_functions.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.libPaths(R.home("library"))

##to be called by WGBS stats Rscripts

print_sessionInfo<-function(mytext){
Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/rscripts/merge_count_tables.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.libPaths(R.home("library"))

library(tools)

args <- commandArgs(trailingOnly=T)
Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/rscripts/merge_featureCounts.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.libPaths(R.home("library"))

library(tools)

args <- commandArgs(trailingOnly=T)
Expand Down
3 changes: 3 additions & 0 deletions snakePipes/shared/rscripts/scRNAseq_cell_filter_monocle.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#run in R3.4.0

.libPaths(R.home("library"))

#set working directory
wdir<-commandArgs(trailingOnly=TRUE)[1]
#system(paste0('mkdir -p ',wdir)) #for debugging
Expand Down
3 changes: 3 additions & 0 deletions snakePipes/shared/rscripts/scRNAseq_cell_filter_raceid.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#run in R3.4.0

.libPaths(R.home("library"))

#set working directory
wdir<-commandArgs(trailingOnly=TRUE)[1]
#system(paste0('mkdir -p ',wdir)) #for debugging
Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/rscripts/scRNAseq_monocle_stats_report.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ This report summarizes statistical analyses of your single cell data stored in `
The distribution of total transcript counts per cell (TPC) in unnormalized data was plotted.

```{r}
.libPaths(R.home("library"))
knitr::include_graphics(file.path(params$outdir,"Expdata.ColumnSums.png"))
```

Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/rscripts/scRNAseq_raceid_stats_report.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ This report summarizes statistical analyses of your single cell data stored in `
The distribution of total transcript counts per cell (TPC) in unnormalized data was plotted.

```{r}
.libPaths(R.home("library"))
knitr::include_graphics(file.path(params$outdir,"Expdata.ColumnSums.png"))
```

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#run in R3.4.0

.libPaths(R.home("library"))

#set working directory
wdir<-commandArgs(trailingOnly=TRUE)[1]
#system(paste0('mkdir -p ',wdir)) #for debugging
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#run in R3.4.0

.libPaths(R.home("library"))

#set working directory
wdir<-commandArgs(trailingOnly=TRUE)[1]
#system(paste0('mkdir -p ',wdir)) #for debugging
Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/rscripts/sleuth.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.libPaths(R.home("library"))

library("sleuth")
library("dplyr")
#library("biomaRt")
Expand Down
4 changes: 3 additions & 1 deletion snakePipes/shared/rscripts/wasabi.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
.libPaths(R.home("library"))

library(wasabi)

args = commandArgs(trailingOnly=TRUE)

prepare_fish_for_sleuth(args)
prepare_fish_for_sleuth(args)
41 changes: 28 additions & 13 deletions snakePipes/shared/rules/TrimGalore.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -91,16 +91,31 @@ else:

### FastQC_on_trimmed #######################################################

rule FastQC_on_trimmed:
input:
fastq_dir+"/{sample}{read}.fastq.gz"
output:
"FastQC_trimmed/{sample}{read}_fastqc.html"
log:
out = "FastQC_trimmed/logs/FastQC_trimmed.{sample}{read}.out",
err = "FastQC_trimmed/logs/FastQC_trimmed.{sample}{read}.err"
benchmark:
"FastQC_trimmed/.benchmark/FastQC_trimmed.{sample}{read}.benchmark"
threads: 2
conda: CONDA_SHARED_ENV
shell: "fastqc -o FastQC_trimmed {input} > {log.out} 2> {log.err}"
if paired:
rule FastQC_on_trimmed:
input:
fastq_dir+"/{sample}{read}.fastq.gz"
output:
"FastQC_trimmed/{sample}{read}_fastqc.html"
log:
out = "FastQC_trimmed/logs/FastQC_trimmed.{sample}{read}.out",
err = "FastQC_trimmed/logs/FastQC_trimmed.{sample}{read}.err"
benchmark:
"FastQC_trimmed/.benchmark/FastQC_trimmed.{sample}{read}.benchmark"
threads: 2
conda: CONDA_SHARED_ENV
shell: "fastqc -o FastQC_trimmed {input} > {log.out} 2> {log.err}"
else:
rule FastQC_on_trimmed_SE:
input:
fastq_dir+"/{sample}"+reads[0]+".fastq.gz"
output:
"FastQC_trimmed/{sample}"+reads[0]+"_fastqc.html"
log:
out = "FastQC_trimmed/logs/FastQC_trimmed.{sample}"+reads[0]+".out",
err = "FastQC_trimmed/logs/FastQC_trimmed.{sample}"+reads[0]+".err"
benchmark:
"FastQC_trimmed/.benchmark/FastQC_trimmed.{sample}"+reads[0]+".benchmark"
threads: 2
conda: CONDA_SHARED_ENV
shell: "fastqc -o FastQC_trimmed {input} > {log.out} 2> {log.err}"
4 changes: 2 additions & 2 deletions snakePipes/shared/rules/filter_annotation.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ rule create_annotation_bed:
""" pos=match($0,"transcript_[bio]*type.([^[:space:]]+)",a); if (pos!=0) tt=a[1]; else tt="NA"; """
""" pos=match($0,"gene_name.([^[:space:]]+)",a); if (pos!=0) gna=a[1]; else gna=gid; """
""" pos=match($0,"gene_[bio]*type.([^[:space:]]+)",a); if (pos!=0) gt=a[1]; else gt="NA"; """
""" pos=match($0,"(transcript_support_level.[^[:space:]]+)",a); if (pos!=0) tsl=a[1]; else tsl="transcript_support_level NA"; """
""" pos=match($0,"[[:space:]](level.[^[:space:]]+)",a); if (pos!=0) lvl=a[1] ; else lvl="level NA"; """
""" pos=match($0,"transcript_support_level.([^[:space:]]+)",a); if (pos!=0) tsl=a[1]; else tsl="NA"; """
""" pos=match($0,"[[:space:]]level.([^[:space:]]+)",a); if (pos!=0) lvl=a[1] ; else lvl="NA"; """
""" pos=match($0,"tag.basic"); if (lvl!~"NA"){{if (pos==0) basic="full"; else basic="basic"}} else basic="NA"; """
""" OFS="\\t"; print tid,tna,tt,gid,gna,gt,"gencode",basic,"transcript_support_level",tsl,"level",lvl}}' | """
""" sort | uniq | sort -k1,1) | """
Expand Down
6 changes: 3 additions & 3 deletions snakePipes/shared/rules/multiQC.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ def multiqc_input_check(return_value):
indir += " ".join(expand("HiC_matrices/QCplots/{sample}_QC ", sample = samples))
elif pipeline == "scrna-seq":
if trim:
infiles.append( expand("FastQC_trimmed/{sample}_fastqc.html", sample = samples) )
infiles.append( expand("FastQC_trimmed/{sample}"+reads[0]+"_fastqc.html", sample = samples) )
indir += " FastQC_trimmed "
infiles.append( expand("FastQC/{sample}{read}_fastqc.html", sample = samples, read = reads) )
infiles.append( expand("FastQC/{sample}"+reads[0]+"_fastqc.html", sample = samples) )
indir +=" FastQC "
infiles.append( expand(fastq_dir+"/{sample}.fastq.gz", sample = samples, read = reads) )
infiles.append( expand(fastq_dir+"/{sample}"+reads[0]+".fastq.gz", sample = samples) )
indir += fastq_dir + " "
elif fastqc:
infiles.append( expand("FastQC/{sample}{read}_fastqc.html", sample = samples, read = reads) )
Expand Down
6 changes: 3 additions & 3 deletions snakePipes/shared/rules/scRNAseq.snakefile
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
### add barcodes from R1 to R2 #########

rule fastq_barcode:
input:
R1 = "FASTQ/{sample}"+reads[0]+".fastq.gz",
R2 = "FASTQ/{sample}"+reads[1]+".fastq.gz"
input: ## remember that we swapped reads[] in internals.snakefile in this workflow!!!
R2 = "FASTQ/{sample}"+reads[0]+".fastq.gz",
R1 = "FASTQ/{sample}"+reads[1]+".fastq.gz"
output:
R2_barcoded = "FASTQ_barcoded/{sample}"+reads[0]+".fastq.gz"
params:
Expand Down
6 changes: 3 additions & 3 deletions snakePipes/workflows/scRNAseq/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ if trim:
def run_Trimming(trim):
if trim:
file_list = [
expand(fastq_dir+"/{sample}.fastq.gz", sample = samples),
expand("FastQC_trimmed/{sample}_fastqc.html", sample = samples)
expand(fastq_dir+"/{sample}"+reads[0]+".fastq.gz", sample = samples),
expand("FastQC_trimmed/{sample}"+reads[0]+"_fastqc.html", sample = samples)
]
return(file_list)
else:
Expand Down Expand Up @@ -139,4 +139,4 @@ rule all:
onsuccess:
cf.cleanLogs(outdir)
if "verbose" in config and config["verbose"]:
print("\n--- scRNAseq-mapcount workflow finished successfully! --------------------------------\n")
print("\n--- scRNAseq workflow finished successfully! --------------------------------\n")
4 changes: 3 additions & 1 deletion snakePipes/workflows/scRNAseq/cluster.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
STAR:
memory: 3200M
memory: 3500M
sc_bam_featureCounts_genomic:
memory: 4G
bamPE_fragment_size:
memory: 3G
combine_sample_counts:
memory: 10G
cluster_cells_raceid:
Expand Down
3 changes: 3 additions & 0 deletions snakePipes/workflows/scRNAseq/internals.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ if not cf.is_paired(infiles,ext,reads):
## After barcode transfer to R2 we have only single end data / R2
## but we need to keep "reads" for rule fastq_barcode
paired = False
## we swap read extensions as we continue in SE mode but with R2
##some rules use a hardcoded reads[0] for SE
reads = reads[::-1]

### barcode pattern extraction #################################################
pattern = re.compile("[N]+")
Expand Down

0 comments on commit 898986b

Please sign in to comment.