Merge pull request #719 from maxplanck-ie/dev_ksikora2

multiple comparisons for RNAseq
maxplanck-ie · Nov 2, 2020 · 5d226d3 · 5d226d3
2 parents 3ca8345 + 17791b4
commit 5d226d3
Show file tree

Hide file tree

Showing 19 changed files with 483 additions and 33 deletions.
diff --git a/.ci_stuff/test_dag.sh b/.ci_stuff/test_dag.sh
@@ -184,7 +184,7 @@ if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 331 ]; then exit 1 ; fi
 WC=`mRNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
 if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 826 ]; then exit 1 ; fi
 WC=`mRNA-seq -i PE_input -o output --rMats --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
-if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 841 ]; then exit 1 ; fi
+if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 842 ]; then exit 1 ; fi
 WC=`mRNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
 if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 602 ]; then exit 1 ; fi
 WC=`mRNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment,deepTools_qc" --trim .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
@@ -207,6 +207,9 @@ WC=`mRNA-seq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv
 if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 909 ]; then exit 1 ; fi
 WC=`mRNA-seq -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
 if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 593 ]; then exit 1 ; fi
+#multiple comparison groups
+WC=`mRNA-seq --mode alignment,alignment-free -i PE_input -o output --rMats --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
+if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 867 ]; then exit 1 ; fi
 #allelic
 WC=`mRNA-seq -m allelic-mapping,deepTools_qc -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1,strain2 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
 if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1450 ]; then exit 1 ; fi
@@ -222,7 +225,9 @@ WC=`noncoding-RNA-seq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleS
 if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 582 ]; then exit 1 ; fi
 WC=`noncoding-RNA-seq -i BAM_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
 if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 503 ]; then exit 1 ; fi
-
+#multiple comparison groups
+WC=`noncoding-RNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
+if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 689 ]; then exit 1 ; fi
 
 # scRNA-seq
 #WC=`scRNAseq -i PE_input -o output --mode Gruen --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`

diff --git a/.ci_stuff/test_sampleSheet_multiComp.tsv b/.ci_stuff/test_sampleSheet_multiComp.tsv
@@ -0,0 +1,7 @@
+name	condition	group
+sample1	sample1	Control	All
+sample2	sample2	Control	All
+sample3	sample3	Treatment	Group1
+sample4	sample4	Treatment	Group1
+sample5	sample5	Treatment	Group2
+sample6	sample6	Treatment	Group2
diff --git a/docs/content/News.rst b/docs/content/News.rst
@@ -4,8 +4,12 @@ snakePipes News
 snakePipes x.y.z
 ----------------
 
+* Added support for multiple pairwise comparisons for DESeq2, sleuth, and rMats in the mRNA-seq workflow, as well as for DESeq2 in the noncoding-RNA-seq workflow.
 * Loompy from conda is now used in mode STARsolo in scRNA-seq workflow.
 * Added bamExt to mRNA-seq and noncoding-RNA-seq commandline arguments.
+* Added multi-thread support to rMats in mRNA-seq workflow.
+* Fixed deepTools GC bias command with SE reads.
+
 
 snakePipes 2.3.1
 ----------------

diff --git a/docs/content/workflows/mRNA-seq.rst b/docs/content/workflows/mRNA-seq.rst
@@ -129,6 +129,23 @@ Complex designs with blocking factors
 
 If the user provides additional columns between 'name' and 'condition' in the sample sheet, the variables stored there will be used as blocking factors in the order they appear in the sample sheet. Eg. if the first line of your sample sheet looks like 'name	batch	condition', this will translate into a formula ``batch + condition``. 'condition' has to be the final column and it will be used for any statistical inference.
 
+Multiple pairwise comparisons
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The user may specify multiple groups of independent comparisons by providing a 'group' column after the 'condition' column. This will cause the sample sheet to be split into the groups defined in this column, and a corresponding number of independent pairwise comparisons will be run, one for each split sheet, and placed in separate output folders named accordingly. This will be applied to DESeq2, sleuth, and rMats pairwise comparisons as requested by the user.
+Specifying a value of 'All' in the 'group' column will cause that sample group to be used in all pairwise comparisons, e.g. if the same set of controls should be used for several different treatment groups.
+
+An example sample sheet with the group information provided looks like this:
+
+name	condition	group
+sample1	Control		All
+sample2	Control		All
+sample3	Treatment	Group1
+sample4	Treatment	Group1
+sample5	Treatment	Group2
+sample6	Treatment	Group2
+
+
 Analysis modes
 --------------
 

diff --git a/docs/content/workflows/noncoding-RNA-seq.rst b/docs/content/workflows/noncoding-RNA-seq.rst
@@ -102,6 +102,22 @@ Complex designs with blocking factors
 
 If the user provides additional columns between 'name' and 'condition' in the sample sheet, the variables stored there will be used as blocking factors in the order they appear in the sample sheet. Eg. if the first line of your sample sheet looks like 'name	batch	condition', this will translate into a formula ``batch + condition``. 'condition' has to be the final column and it will be used for any statistical inference.
 
+Multiple pairwise comparisons
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The user may specify multiple groups of independent comparisons by providing a 'group' column after the 'condition' column. This will cause the sample sheet to be split into the groups defined in this column, and a corresponding number of independent pairwise comparisons will be run, one for each split sheet, and placed in separate output folders named accordingly. This will be applied to DESeq2 pairwise comparison.
+Specifying a value of 'All' in the 'group' column will cause that sample group to be used in all pairwise comparisons, e.g. if the same set of controls should be used for several different treatment groups.
+
+An example sample sheet with the group information provided looks like this:
+
+name	condition	group
+sample1	Control		All
+sample2	Control		All
+sample3	Treatment	Group1
+sample4	Treatment	Group1
+sample5	Treatment	Group2
+sample6	Treatment	Group2
+
 Analysis modes
 --------------
 

diff --git a/snakePipes/common_functions.py b/snakePipes/common_functions.py
@@ -22,6 +22,7 @@ def set_env_yamls():
     return {'CONDA_SHARED_ENV': 'envs/shared.yaml',
             'CONDA_CREATE_INDEX_ENV': 'envs/createIndices.yaml',
             'CONDA_RNASEQ_ENV': 'envs/rna_seq.yaml',
+            'CONDA_RMATS_ENV': 'envs/rMats.yaml',
             'CONDA_scRNASEQ_ENV': 'envs/sc_rna_seq.yaml',
             'CONDA_seurat3_ENV': 'envs/sc_rna_seq_seurat3.yaml',
             'CONDA_loompy_ENV': 'envs/sc_rna_seq_loompy.yaml',
@@ -230,6 +231,101 @@ def check_replicates(sample_info_file):
     return True
 
 
+def isMultipleComparison(sampleSheet):
+    f = open(sampleSheet)
+    nCols = None
+    d = dict()
+    for idx, line in enumerate(f):
+        cols = line.strip().split("\t")
+        if idx == 0:
+            if "group" not in cols:
+                return False
+            comparisonGroupCol = cols.index("group")
+            nCols = len(cols)
+            continue
+        elif idx == 1:
+            # Sometimes there's a column of row names, which lack a header
+            if len(cols) - 1 == nCols:
+                comparisonGroupCol += 1
+        if not len(line.strip()) == 0:
+            if cols[comparisonGroupCol] not in d:
+                d[cols[comparisonGroupCol]] = 0
+            d[cols[comparisonGroupCol]] += 1
+    f.close()
+
+    if len(d) > 1:
+        return True
+
+
+def splitSampleSheet(sampleSheet, destination_pfx):
+    f = open(sampleSheet)
+    conditionCol = None
+    nameCol = None
+    comparisonGroupCol = None
+    nCols = None
+    d = dict()
+    for idx, line in enumerate(f):
+        cols = line.strip().split("\t")
+        if idx == 0:
+            conditionCol = cols.index("condition")
+            nameCol = cols.index("name")
+            comparisonGroupCol = cols.index("group")
+            nCols = len(cols)
+            continue
+        elif idx == 1:
+            # Sometimes there's a column of row names, which lack a header
+            if len(cols) != nCols and len(cols) - 1 != nCols:
+                sys.exit("ERROR: there's a mismatch between the number of columns in the header and body of {}!\n".format(sampleSheet))
+            if len(cols) - 1 == nCols:
+                conditionCol += 1
+                nameCol += 1
+                comparisonGroupCol += 1
+        if not len(line.strip()) == 0:
+            if cols[comparisonGroupCol] not in d:
+                d[cols[comparisonGroupCol]] = []
+            d[cols[comparisonGroupCol]].append([cols[nameCol], cols[conditionCol]])
+
+    f.close()
+    for k in d.keys():
+        if k != "All" and "All" in d.keys():
+            d[k].extend(d['All'])
+            outfile = os.path.join("splitSampleSheets", '.'.join([os.path.basename(destination_pfx), k, 'tsv']))
+            with open(outfile, 'w') as of:
+                of.write('name\tcondition\n')
+                for item in d[k]:
+                    of.write('\t'.join(item) + '\n')
+
+    return
+
+
+def returnComparisonGroups(sampleSheet):
+    f = open(sampleSheet)
+    nCols = None
+    d = dict()
+    for idx, line in enumerate(f):
+        cols = line.strip().split("\t")
+        if idx == 0:
+            if "group" not in cols:
+                return False
+            comparisonGroupCol = cols.index("group")
+            nCols = len(cols)
+            continue
+        elif idx == 1:
+            # Sometimes there's a column of row names, which lack a header
+            if len(cols) - 1 == nCols:
+                comparisonGroupCol += 1
+        if not len(line.strip()) == 0:
+            if cols[comparisonGroupCol] not in d:
+                d[cols[comparisonGroupCol]] = 0
+            d[cols[comparisonGroupCol]] += 1
+    f.close()
+
+    if "All" in d.keys():
+        del d['All']
+
+    return d.keys()
+
+
 def sampleSheetGroups(sampleSheet):
     """
     Parse a sampleSheet and return a dictionary with keys the group and values the sample names

diff --git a/snakePipes/shared/rules/DESeq2.multipleComp.snakefile b/snakePipes/shared/rules/DESeq2.multipleComp.snakefile
@@ -0,0 +1,91 @@
+## function to get the name of the samplesheet and extend the name of the folder DESeq2 to DESeq2_[name]
+def get_outdir(folder_name,sampleSheet):
+    sample_name = os.path.splitext(os.path.basename(str(sampleSheet)))[0]
+
+    return("{}_{}".format(folder_name, sample_name))
+
+checkpoint split_sampleSheet:
+    input:
+        sampleSheet = sampleSheet
+    output:
+        splitSheets = os.path.join("splitSampleSheets",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")
+    params:
+        splitSheetPfx = os.path.join("splitSampleSheets",os.path.splitext(os.path.basename(str(sampleSheet)))[0])
+    run:
+        if isMultipleComparison:
+            cf.splitSampleSheet(input.sampleSheet,params.splitSheetPfx)
+
+
+## DESeq2 (on featureCounts)
+rule DESeq2:
+    input:
+        counts_table = lambda wildcards : "featureCounts/counts_allelic.tsv" if 'allelic-mapping' in mode else "featureCounts/counts.tsv",
+        sampleSheet = lambda wildcards: checkpoints.split_sampleSheet.get(compGroup=wildcards.compGroup).output,
+        symbol_file = "Annotation/genes.filtered.symbol" #get_symbol_file
+    output:
+         "{}/DESeq2.session_info.txt".format(get_outdir("DESeq2",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv"))
+    benchmark:
+        "{}/.benchmark/DESeq2.featureCounts.benchmark".format(get_outdir("DESeq2",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv"))
+    params:
+        script=os.path.join(maindir, "shared", "rscripts", "DESeq2.R"),
+        outdir = lambda wildcards,input: get_outdir("DESeq2",input.sampleSheet),
+        sampleSheet = lambda wildcards,input: os.path.join(outdir,str(input.sampleSheet)),
+        fdr = 0.05,
+        importfunc = os.path.join(maindir, "shared", "rscripts", "DE_functions.R"),
+        allele_info = lambda wildcards : 'TRUE' if 'allelic-mapping' in mode else 'FALSE',
+        tx2gene_file = 'NA',
+        rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd")
+    log:
+        out = "{}/logs/DESeq2.out".format(get_outdir("DESeq2",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")),
+        err = "{}/logs/DESeq2.err".format(get_outdir("DESeq2",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv"))
+    conda: CONDA_RNASEQ_ENV
+    shell:
+        "cd {params.outdir} && "
+        "Rscript {params.script} "
+        "{params.sampleSheet} " # 1
+        "../{input.counts_table} " # 2
+        "{params.fdr} " # 3
+        "../{input.symbol_file} " # 4
+        "{params.importfunc} " # 5
+        "{params.allele_info} " # 6
+        "{params.tx2gene_file} " # 7
+        "{params.rmdTemplate} " # 8
+        " > ../{log.out} 2> ../{log.err}"
+
+
+## DESeq2 (on Salmon)
+rule DESeq2_Salmon:
+    input:
+        counts_table = "Salmon/counts.transcripts.tsv",
+        sampleSheet = lambda wildcards: checkpoints.split_sampleSheet.get(compGroup=wildcards.compGroup).output,
+        tx2gene_file = "Annotation/genes.filtered.t2g",
+        symbol_file = "Annotation/genes.filtered.symbol" #get_symbol_file
+    output:
+        "{}/DESeq2.session_info.txt".format(get_outdir("DESeq2_Salmon",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv"))
+    log:
+        out = "{}/logs/DESeq2.out".format(get_outdir("DESeq2_Salmon",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")),
+        err = "{}/logs/DESeq2.err".format(get_outdir("DESeq2_Salmon",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv"))
+    benchmark:
+        "{}/.benchmark/DESeq2.Salmon.benchmark".format(get_outdir("DESeq2_Salmon",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv"))
+    params:
+        script=os.path.join(maindir, "shared", "rscripts", "DESeq2.R"),
+        outdir = lambda wildcards,input: get_outdir("DESeq2_Salmon",input.sampleSheet),
+        sampleSheet = lambda wildcards,input: os.path.join(outdir,str(input.sampleSheet)),
+        fdr = 0.05,
+        importfunc = os.path.join(maindir, "shared", "rscripts", "DE_functions.R"),
+        allele_info = 'FALSE',
+        tx2gene_file = "Annotation/genes.filtered.t2g",
+        rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd")
+    conda: CONDA_RNASEQ_ENV
+    shell:
+        "cd {params.outdir} && "
+        "Rscript {params.script} "
+        "{params.sampleSheet} " # 1
+        "../{input.counts_table} " # 2
+        "{params.fdr} " # 3
+        "../{input.symbol_file} " # 4
+        "{params.importfunc} " # 5
+        "{params.allele_info} " # 6
+        "../{input.tx2gene_file} " # 7
+        "{params.rmdTemplate} " # 8
+        " > ../{log.out} 2> ../{log.err}"
diff --git a/snakePipes/shared/rules/DESeq2.snakefile → .../shared/rules/DESeq2.singleComp.snakefile b/snakePipes/shared/rules/DESeq2.snakefile → .../shared/rules/DESeq2.singleComp.snakefile
diff --git a/snakePipes/shared/rules/envs/rMats.yaml b/snakePipes/shared/rules/envs/rMats.yaml
@@ -0,0 +1,8 @@
+name: snakepipes_rMats_environment_0.1
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - python >= 3
+ - rmats = 4.1.0
+ - samtools = 1.10
diff --git a/snakePipes/shared/rules/envs/rna_seq.yaml b/snakePipes/shared/rules/envs/rna_seq.yaml
@@ -3,6 +3,7 @@ channels:
  - conda-forge
  - bioconda
 dependencies:
+# - python >= 3
  - bedtools = 2.29.2
  - samtools = 1.10
  - subread = 2.0.0
@@ -24,4 +25,4 @@ dependencies:
  - bioconductor-tximport = 1.14.0
  - r-knitcitations
  - bioconductor-apeglm
- - rmats
+# - rmats = 4.1.0
diff --git a/snakePipes/shared/rules/rMats.multipleComp.snakefile b/snakePipes/shared/rules/rMats.multipleComp.snakefile
@@ -0,0 +1,70 @@
+## function to get the name of the samplesheet and extend the name of the folder. (Same logic as for DESeq2).
+def get_outdir(folder_name,sampleSheet):
+    sample_name = os.path.splitext(os.path.basename(str(sampleSheet)))[0]
+    return("{}_{}".format(folder_name, sample_name))
+## reWrap libType to string to use as flag for rmats rather than int.
+def wrap_libType(libType):
+    dic_libType = {0:"fr-unstranded",1:"fr-firststrand",2:"fr-secondstrand"}
+    return dic_libType[libType]
+
+## requires the checkpoint rule defined in DESeq2.multipleComp.snakefile
+#rMatsConds = cf.sampleSheetGroups(sampleSheet)
+
+def generate_b1_b2(sampleSheet,which_b):
+    if os.path.isfile(sampleSheet):
+        rMatsConds = cf.sampleSheetGroups(sampleSheet)
+        if which_b == "b1":
+            return ",".join(["filtered_bam/" + s for s in [s + ".filtered.bam" for s in rMatsConds[list(rMatsConds)[0]]]])
+        else:
+            return ",".join(["filtered_bam/" + s for s in [s + ".filtered.bam" for s in rMatsConds[list(rMatsConds)[1]]]])
+    else:
+        return ""
+
+def get_s1(sampleSheet):
+    if os.path.isfile(sampleSheet):
+        rMatsConds = cf.sampleSheetGroups(sampleSheet)
+        return ["filtered_bam/" + s for s in [s + ".filtered.bam" for s in rMatsConds[list(rMatsConds)[0]]]][0]
+    else:
+        return ""
+
+rule createInputcsv:
+    input:
+        bams = expand("filtered_bam/{sample}.filtered.bam.bai", sample=samples),
+        sampleSheet = lambda wildcards: checkpoints.split_sampleSheet.get(compGroup=wildcards.compGroup).output
+    output:
+        b1out = "rMats_{}/b1.csv".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}"),
+        b2out = "rMats_{}/b2.csv".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}")
+    params:
+        b1 = lambda wildcards,input: generate_b1_b2(str(input.sampleSheet),"b1"),
+        b2 = lambda wildcards,input: generate_b1_b2(str(input.sampleSheet),"b2")
+    shell: """
+        echo '{params.b1}' > {output.b1out}
+        echo '{params.b2}' > {output.b2out}
+"""
+
+rule rMats:
+    input:
+        b1 = "rMats_{}/b1.csv".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}"),
+        b2 = "rMats_{}/b2.csv".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}"),
+        sampleSheet = lambda wildcards: checkpoints.split_sampleSheet.get(compGroup=wildcards.compGroup).output
+    output:
+        "rMats_{}/RI.MATS.JCEC.txt".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}")
+    params:
+        s1 = lambda wildcards,input: get_s1(str(input.sampleSheet)),
+        readLen = "rMats_{}/readlength.txt".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}"),
+        gtf = genes_gtf,
+        od = "rMats_{}".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}"),
+        end = "paired" if pairedEnd else "single",
+        libType = wrap_libType(libraryType),
+        tempDir = tempDir,
+    log: "rMats_{}/rMats.log".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}")
+    threads: 4
+    conda: CONDA_RMATS_ENV
+    shell:"""
+        TMPDIR={params.tempDir}
+        MYTEMP=$(mktemp -d ${{TMPDIR:-/tmp}}/snakepipes.XXXXXXXXXX);
+        set +o pipefail;
+        readLen=$(samtools view {params.s1} | awk \'{{print length($10)}}\' | head -10000 | awk \'{{ sum += $1 }} END {{ if (NR > 0) print sum / NR }}\')
+        rmats.py --gtf {params.gtf} --b1 {input.b1} --b2 {input.b2} --od {params.od} --tmp $MYTEMP -t {params.end} --libType {params.libType} --readLength $readLen --variable-read-length --nthread {threads} --tstat {threads} 2> {log};
+        rm -rf $MYTEMP
+"""