Merge pull request #671 from maxplanck-ie/dev_ksikora2

Dev ksikora2
maxplanck-ie · Aug 21, 2020 · dec4593 · dec4593
2 parents 0706188 + 2e2c38a
commit dec4593
Show file tree

Hide file tree

Showing 12 changed files with 83 additions and 18 deletions.
diff --git a/.ci_stuff/test_dag.sh b/.ci_stuff/test_dag.sh
@@ -111,7 +111,7 @@ WC=`ChIP-seq -d outdir --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeO
 if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 715 ]; then exit 1 ; fi
 # fromBAM and spikein
 WC=`ChIP-seq -d outdir --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
-if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 840 ]; then exit 1 ; fi
+if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 714 ]; then exit 1 ; fi
 WC=`ChIP-seq -d outdir --useSpikeInForNorm --getSizeFactorsFrom TSS --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
 if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 591 ]; then exit 1 ; fi
 WC=`ChIP-seq -d outdir --useSpikeInForNorm --getSizeFactorsFrom input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
@@ -168,7 +168,7 @@ if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1038 ]; then exit 1 ; fi
 WC=`scRNAseq -i PE_input -o output --mode Gruen --snakemakeOptions " --dryrun --conda-prefix /tmp" --skipRaceID --splitLib .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
 if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1015 ]; then exit 1 ; fi
 WC=`scRNAseq -i PE_input -o output --mode STARsolo --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
-if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 957 ]; then exit 1 ; fi
+if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 965 ]; then exit 1 ; fi
 WC=`scRNAseq -i PE_input -o output --mode Alevin --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
 if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 337 ]; then exit 1 ; fi
 

diff --git a/bin/snakePipes b/bin/snakePipes
@@ -317,7 +317,7 @@ def updateConfig(args):
     """Update the global defaults"""
     baseDir = os.path.dirname(snakePipes.__file__)
     # Load, update and rewrite the default dictionary
-    currentDict = cof.load_configfile(os.path.join(baseDir, "shared", "defaults.yaml"), True)
+    currentDict = cof.load_configfile(os.path.join(baseDir, "shared", "defaults.yaml"), False, "Default Config")
 
     if args.configMode=="manual":
         d = {
@@ -338,13 +338,20 @@ def updateConfig(args):
     elif args.configMode=="recycle":
         oldConfig=args.oldConfig
         if os.path.isfile(oldConfig):
-            d = cof.load_configfile(oldConfig, True)
+            d = cof.load_configfile(oldConfig, False, "Old Config")
+            if args.organismsDir:
+                od = {'organismsDir': args.organismsDir}
+                d.update(od)
+            if args.clusterConfig:
+                od = {'clusterConfig': args.clusterConfig}
+                d.update(od)
             if not currentDict.keys() & d.keys():
                 sys.exit("The old and the new config have no matching keys!!!\n")
         else:
             sys.exit("Config file not found\n")
     updatedDict=cof.merge_dicts(currentDict, d)
     cof.write_configfile(os.path.join(baseDir, "shared", "defaults.yaml"), updatedDict)
+    newDict=cof.load_configfile(os.path.join(baseDir, "shared", "defaults.yaml"), True, "Final Updated Config")
 
 
 def version():

diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
@@ -1,6 +1,6 @@
 package:
   name: snakepipes
-  version: 2.1.2
+  version: 2.2.0
 
 source:
   path: ../
@@ -14,7 +14,7 @@ requirements:
     - python >=3
   run:
     - python >=3.7
-    - snakemake >=5.13
+    - snakemake ==5.18.0
     - pandas
     - graphviz
     - fuzzywuzzy

diff --git a/docs/content/News.rst b/docs/content/News.rst
@@ -1,24 +1,30 @@
 snakePipes News
 ===============
 
-snakePipes 2.x.y
+snakePipes 2.2.0
 ----------------
 * Added Alevin mode in scRNA workflow
 * Added a new conda environment using to call AlevinQC.
 * Added filtering of empty drops with Dropletutils to scRNA-seq mode STARsolo
 * Added spikein normalization to ChIPseq workflow
 * Added hybrid genome creation to createIndices
+* Added STARsolo report for all samples to STARsolo output folder
 * FASTQ1 and FASTQ2 are not localrules anymore due to buggy logging
 * Included optional differential splicing analysis using rmats within mRNA-seq workflow
 * Symlinks in the output path are relative 
 * Increased BBmap version
 * Increased STAR version to 2.7.4a in scRNAseq, noncoding-RNA-seq and mRNA-seq workflows
+* Fixed snakemake version at 5.18.0 due to a bug in DAG handling
 * Minor changes to shared FastQC and multiQC rule with regards to scRNA-seq workflow.
 * Fixed issue with missing input for running the DNA-mapping Snakefile
 * Fixed rule TrimGalore for single end reads
 * deepTools heatmaps for differentially bound regions are now ordered by sample sheet condition
 * Genrich is now run on namesorted bams
 * Workflow help message now points to example sampleSheet on GitHub
+* organismsDir can now be updated with snakePipes config mode "recycle"
+
+.. note::
+   Please be aware that this version requires regeneration of STAR indices!
 
 snakePipes 2.1.2
 ----------------

diff --git a/docs/content/setting_up.rst b/docs/content/setting_up.rst
@@ -96,6 +96,9 @@ This would show the locations of:
  * **organisms/<organism>.yaml** : Defines genome indices and annotations for various organisms. See :ref:`organisms`
  * Workflow-specific defaults : Defines default options for our command line wrappers. See :ref:`workflowOpts`
 
+It is a good idea to keep a copy of your defaults.yaml, cluster.yaml and the whole organism folder in a dedicated location e.g. some folder *outside the snakePipes installation folder* named "snakePipes_configs" .
+You can configure snakePipes to use these files after a fresh installation or update with ``snakePipes config --organismsDir my_organisms_dir --clusterConfig my_cluster_config`` . This will also work if you add ``--configMode recycle``.  
+
 
 .. _conda:
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -52,7 +52,7 @@ snakePipes is going to move to mamba in the future.
 
     snakePipes config --help
 
-.. note:: If you have a copy of a `shared/defaults.yaml` with the necessary paths configured (i.e. from a previous installation), you can pass it to snakePipes config with `--oldConfig` and `--configMode recycle` instead of providing all the paths manually again. Config keys have to match for this to work.
+.. note:: If you have a copy of a `shared/defaults.yaml` with the necessary paths configured (i.e. from a previous installation), you can pass it to snakePipes config with `--oldConfig` and `--configMode recycle` instead of providing all the paths manually again. Config keys have to match for this to work. In the same way, you can pass your external organism yaml folder with ``--organismsDir`` or cluster config with ``--clusterConfig``.
 
 * Download example fastq files for the human genome `here <https://zenodo.org/record/3707259>`_
 

diff --git a/snakePipes/__init__.py b/snakePipes/__init__.py
@@ -1 +1 @@
-__version__ = '2.1.2'
+__version__ = '2.2.0'
diff --git a/snakePipes/shared/rscripts/scRNAseq_EmptyDrops.R b/snakePipes/shared/rscripts/scRNAseq_EmptyDrops.R
@@ -73,9 +73,13 @@ filter_empty_cells<-function(folder,sample){
 
 }
 
-l<-mapply(SIMPLIFY=FALSE, function(X,Y) filter_empty_cells(X,Y),X=in_dirs,Y=samples)
-names(l)<-samples
-s<-merge(x=l[[1]],y=unlist(l[[2:length(l)]]),add.cell.ids=names(l))
+if(length(in_dirs)>1){
+    l<-mapply(SIMPLIFY=FALSE, function(X,Y) filter_empty_cells(X,Y),X=in_dirs,Y=samples)
+    names(l)<-samples
+    s<-merge(x=l[[1]],y=unlist(l[[2:length(l)]]),add.cell.ids=names(l))
+}else{
+    s<-filter_empty_cells(in_dirs,samples)
+}
 
 outfile<-file.path(wdir,basename(snakemake@output[["seurat"]]))
 saveRDS(s,file=outfile)

diff --git a/snakePipes/shared/rscripts/scRNAseq_report.R b/snakePipes/shared/rscripts/scRNAseq_report.R
@@ -0,0 +1,29 @@
+sink(snakemake@log[["out"]])
+.libPaths(R.home("library"))
+
+wdir<-snakemake@params[["wdir"]]
+if (!dir.exists(wdir)) dir.create(wdir)
+setwd(wdir)
+message(sprintf("working directory is %s",getwd()))
+
+rdir<-snakemake@params[["input"]]
+rshort<-snakemake@params[["samples"]]
+
+rl<-vector("list",)
+for(i in seq_along(rdir)){
+    tabi<-read.table(rdir[i],header=FALSE,sep=",",quote="",as.is=TRUE)
+    colnames(tabi)<-c("Metric",rshort[i])
+    rl[[i]]<-tabi
+}
+
+rdf <- Reduce(function(x, y, ...) merge(x, y, all = TRUE, by="Metric", sort=FALSE, ...),rl)
+
+outf<-file.path(wdir,basename(snakemake@output[["report"]]))
+write.table(rdf,outf,row.names=FALSE,quote=FALSE,sep="\t")
+
+message('done all')
+sink()
+
+sink("sessionInfo.txt")
+sessionInfo()
+sink()
diff --git a/snakePipes/shared/rules/scRNAseq_STARsolo.snakefile b/snakePipes/shared/rules/scRNAseq_STARsolo.snakefile
@@ -15,7 +15,8 @@ rule STARsolo:
         filtered_counts = "STARsolo/{sample}/{sample}.Solo.out/Gene/filtered/matrix.mtx",
         filtered_bc = "STARsolo/{sample}/{sample}.Solo.out/Gene/filtered/barcodes.tsv",
         raw_features = "STARsolo/{sample}/{sample}.Solo.out/Gene/raw/features.tsv",
-        filtered_features = "STARsolo/{sample}/{sample}.Solo.out/Gene/filtered/features.tsv"
+        filtered_features = "STARsolo/{sample}/{sample}.Solo.out/Gene/filtered/features.tsv",
+        summary = "STARsolo/{sample}/{sample}.Solo.out/Gene/Summary.csv"
     log: "STARsolo/logs/{sample}.log"
     params:
         alignerOptions = str(alignerOptions or ''),
@@ -68,6 +69,20 @@ rule STARsolo:
         rm -rf $MYTEMP
          """
 
+rule STARsolo_report:
+    input:  expand("STARsolo/{sample}/{sample}.Solo.out/Gene/Summary.csv",sample=samples)
+    output:
+        report = "STARsolo/Report.tsv"
+    params:
+        wdir = outdir + "/STARsolo",
+        input = lambda wildcards,input: [ os.path.join(outdir,x) for x in input ],
+        samples = samples
+    log: 
+        out = "STARsolo/logs/Report.out"
+    conda: CONDA_seurat3_ENV
+    script: "../rscripts/scRNAseq_report.R"
+
+
 rule filter_bam:
     input:
         bamfile = aligner+"/{sample}.sorted.bam",

diff --git a/snakePipes/workflows/ChIP-seq/Snakefile b/snakePipes/workflows/ChIP-seq/Snakefile
@@ -114,12 +114,12 @@ def run_deepTools_ChIP():
                 file_list.append(["deepTools_ChIP/bamCompare/"+chip_sample+".filtered.subtract."+control_name+".bw"])
             if bigWigType == "log2ratio" or bigWigType == "both":
                 file_list.append(["deepTools_ChIP/bamCompare/"+chip_sample+".filtered.log2ratio.over_"+control_name+".bw"])
-        elif useSpikeInForNorm and getSizeFactorsFrom == "genome":
+        #elif useSpikeInForNorm and getSizeFactorsFrom == "genome":
             # get bigwigtype
-            if bigWigType == "subtract" or bigWigType == "both":
-                file_list.append(expand("split_deepTools_ChIP/bamCompare/"+chip_sample+".subtract."+control_name+".scaledBY{part}.bw",part=part))
-            if bigWigType == "log2ratio" or bigWigType == "both":
-                file_list.append(expand("split_deepTools_ChIP/bamCompare/"+chip_sample+".log2ratio.over_"+control_name+".scaledBY{part}.bw",part=part))
+            #if bigWigType == "subtract" or bigWigType == "both":
+            #    file_list.append(expand("split_deepTools_ChIP/bamCompare/"+chip_sample+".subtract."+control_name+".scaledBY{part}.bw",part=part))
+            #if bigWigType == "log2ratio" or bigWigType == "both":
+            #    file_list.append(expand("split_deepTools_ChIP/bamCompare/"+chip_sample+".log2ratio.over_"+control_name+".scaledBY{part}.bw",part=part))
     return(file_list)
 
 def run_deepTools_allelic():

diff --git a/snakePipes/workflows/scRNAseq/Snakefile b/snakePipes/workflows/scRNAseq/Snakefile
@@ -167,6 +167,7 @@ elif mode=="STARsolo":
     rule all:
         input:
             expand("STARsolo/{sample}.sorted.bam",sample = samples),
+            "STARsolo/Report.tsv",
             "Sambamba/flagstat_report_all.tsv",
             run_deeptools_qc(),
             "deepTools_qc/bamPEFragmentSize/fragmentSize.metric.tsv",