-
Notifications
You must be signed in to change notification settings - Fork 85
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #719 from maxplanck-ie/dev_ksikora2
multiple comparisons for RNAseq
- Loading branch information
Showing
19 changed files
with
483 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
name condition group | ||
sample1 sample1 Control All | ||
sample2 sample2 Control All | ||
sample3 sample3 Treatment Group1 | ||
sample4 sample4 Treatment Group1 | ||
sample5 sample5 Treatment Group2 | ||
sample6 sample6 Treatment Group2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
## function to get the name of the samplesheet and extend the name of the folder DESeq2 to DESeq2_[name] | ||
def get_outdir(folder_name,sampleSheet): | ||
sample_name = os.path.splitext(os.path.basename(str(sampleSheet)))[0] | ||
|
||
return("{}_{}".format(folder_name, sample_name)) | ||
|
||
checkpoint split_sampleSheet: | ||
input: | ||
sampleSheet = sampleSheet | ||
output: | ||
splitSheets = os.path.join("splitSampleSheets",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv") | ||
params: | ||
splitSheetPfx = os.path.join("splitSampleSheets",os.path.splitext(os.path.basename(str(sampleSheet)))[0]) | ||
run: | ||
if isMultipleComparison: | ||
cf.splitSampleSheet(input.sampleSheet,params.splitSheetPfx) | ||
|
||
|
||
## DESeq2 (on featureCounts) | ||
rule DESeq2: | ||
input: | ||
counts_table = lambda wildcards : "featureCounts/counts_allelic.tsv" if 'allelic-mapping' in mode else "featureCounts/counts.tsv", | ||
sampleSheet = lambda wildcards: checkpoints.split_sampleSheet.get(compGroup=wildcards.compGroup).output, | ||
symbol_file = "Annotation/genes.filtered.symbol" #get_symbol_file | ||
output: | ||
"{}/DESeq2.session_info.txt".format(get_outdir("DESeq2",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) | ||
benchmark: | ||
"{}/.benchmark/DESeq2.featureCounts.benchmark".format(get_outdir("DESeq2",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) | ||
params: | ||
script=os.path.join(maindir, "shared", "rscripts", "DESeq2.R"), | ||
outdir = lambda wildcards,input: get_outdir("DESeq2",input.sampleSheet), | ||
sampleSheet = lambda wildcards,input: os.path.join(outdir,str(input.sampleSheet)), | ||
fdr = 0.05, | ||
importfunc = os.path.join(maindir, "shared", "rscripts", "DE_functions.R"), | ||
allele_info = lambda wildcards : 'TRUE' if 'allelic-mapping' in mode else 'FALSE', | ||
tx2gene_file = 'NA', | ||
rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd") | ||
log: | ||
out = "{}/logs/DESeq2.out".format(get_outdir("DESeq2",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")), | ||
err = "{}/logs/DESeq2.err".format(get_outdir("DESeq2",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) | ||
conda: CONDA_RNASEQ_ENV | ||
shell: | ||
"cd {params.outdir} && " | ||
"Rscript {params.script} " | ||
"{params.sampleSheet} " # 1 | ||
"../{input.counts_table} " # 2 | ||
"{params.fdr} " # 3 | ||
"../{input.symbol_file} " # 4 | ||
"{params.importfunc} " # 5 | ||
"{params.allele_info} " # 6 | ||
"{params.tx2gene_file} " # 7 | ||
"{params.rmdTemplate} " # 8 | ||
" > ../{log.out} 2> ../{log.err}" | ||
|
||
|
||
## DESeq2 (on Salmon) | ||
rule DESeq2_Salmon: | ||
input: | ||
counts_table = "Salmon/counts.transcripts.tsv", | ||
sampleSheet = lambda wildcards: checkpoints.split_sampleSheet.get(compGroup=wildcards.compGroup).output, | ||
tx2gene_file = "Annotation/genes.filtered.t2g", | ||
symbol_file = "Annotation/genes.filtered.symbol" #get_symbol_file | ||
output: | ||
"{}/DESeq2.session_info.txt".format(get_outdir("DESeq2_Salmon",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) | ||
log: | ||
out = "{}/logs/DESeq2.out".format(get_outdir("DESeq2_Salmon",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")), | ||
err = "{}/logs/DESeq2.err".format(get_outdir("DESeq2_Salmon",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) | ||
benchmark: | ||
"{}/.benchmark/DESeq2.Salmon.benchmark".format(get_outdir("DESeq2_Salmon",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) | ||
params: | ||
script=os.path.join(maindir, "shared", "rscripts", "DESeq2.R"), | ||
outdir = lambda wildcards,input: get_outdir("DESeq2_Salmon",input.sampleSheet), | ||
sampleSheet = lambda wildcards,input: os.path.join(outdir,str(input.sampleSheet)), | ||
fdr = 0.05, | ||
importfunc = os.path.join(maindir, "shared", "rscripts", "DE_functions.R"), | ||
allele_info = 'FALSE', | ||
tx2gene_file = "Annotation/genes.filtered.t2g", | ||
rmdTemplate = os.path.join(maindir, "shared", "rscripts", "DESeq2Report.Rmd") | ||
conda: CONDA_RNASEQ_ENV | ||
shell: | ||
"cd {params.outdir} && " | ||
"Rscript {params.script} " | ||
"{params.sampleSheet} " # 1 | ||
"../{input.counts_table} " # 2 | ||
"{params.fdr} " # 3 | ||
"../{input.symbol_file} " # 4 | ||
"{params.importfunc} " # 5 | ||
"{params.allele_info} " # 6 | ||
"../{input.tx2gene_file} " # 7 | ||
"{params.rmdTemplate} " # 8 | ||
" > ../{log.out} 2> ../{log.err}" |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
name: snakepipes_rMats_environment_0.1 | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
dependencies: | ||
- python >= 3 | ||
- rmats = 4.1.0 | ||
- samtools = 1.10 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
## function to get the name of the samplesheet and extend the name of the folder. (Same logic as for DESeq2). | ||
def get_outdir(folder_name,sampleSheet): | ||
sample_name = os.path.splitext(os.path.basename(str(sampleSheet)))[0] | ||
return("{}_{}".format(folder_name, sample_name)) | ||
## reWrap libType to string to use as flag for rmats rather than int. | ||
def wrap_libType(libType): | ||
dic_libType = {0:"fr-unstranded",1:"fr-firststrand",2:"fr-secondstrand"} | ||
return dic_libType[libType] | ||
|
||
## requires the checkpoint rule defined in DESeq2.multipleComp.snakefile | ||
#rMatsConds = cf.sampleSheetGroups(sampleSheet) | ||
|
||
def generate_b1_b2(sampleSheet,which_b): | ||
if os.path.isfile(sampleSheet): | ||
rMatsConds = cf.sampleSheetGroups(sampleSheet) | ||
if which_b == "b1": | ||
return ",".join(["filtered_bam/" + s for s in [s + ".filtered.bam" for s in rMatsConds[list(rMatsConds)[0]]]]) | ||
else: | ||
return ",".join(["filtered_bam/" + s for s in [s + ".filtered.bam" for s in rMatsConds[list(rMatsConds)[1]]]]) | ||
else: | ||
return "" | ||
|
||
def get_s1(sampleSheet): | ||
if os.path.isfile(sampleSheet): | ||
rMatsConds = cf.sampleSheetGroups(sampleSheet) | ||
return ["filtered_bam/" + s for s in [s + ".filtered.bam" for s in rMatsConds[list(rMatsConds)[0]]]][0] | ||
else: | ||
return "" | ||
|
||
rule createInputcsv: | ||
input: | ||
bams = expand("filtered_bam/{sample}.filtered.bam.bai", sample=samples), | ||
sampleSheet = lambda wildcards: checkpoints.split_sampleSheet.get(compGroup=wildcards.compGroup).output | ||
output: | ||
b1out = "rMats_{}/b1.csv".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}"), | ||
b2out = "rMats_{}/b2.csv".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}") | ||
params: | ||
b1 = lambda wildcards,input: generate_b1_b2(str(input.sampleSheet),"b1"), | ||
b2 = lambda wildcards,input: generate_b1_b2(str(input.sampleSheet),"b2") | ||
shell: """ | ||
echo '{params.b1}' > {output.b1out} | ||
echo '{params.b2}' > {output.b2out} | ||
""" | ||
|
||
rule rMats: | ||
input: | ||
b1 = "rMats_{}/b1.csv".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}"), | ||
b2 = "rMats_{}/b2.csv".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}"), | ||
sampleSheet = lambda wildcards: checkpoints.split_sampleSheet.get(compGroup=wildcards.compGroup).output | ||
output: | ||
"rMats_{}/RI.MATS.JCEC.txt".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}") | ||
params: | ||
s1 = lambda wildcards,input: get_s1(str(input.sampleSheet)), | ||
readLen = "rMats_{}/readlength.txt".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}"), | ||
gtf = genes_gtf, | ||
od = "rMats_{}".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}"), | ||
end = "paired" if pairedEnd else "single", | ||
libType = wrap_libType(libraryType), | ||
tempDir = tempDir, | ||
log: "rMats_{}/rMats.log".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}") | ||
threads: 4 | ||
conda: CONDA_RMATS_ENV | ||
shell:""" | ||
TMPDIR={params.tempDir} | ||
MYTEMP=$(mktemp -d ${{TMPDIR:-/tmp}}/snakepipes.XXXXXXXXXX); | ||
set +o pipefail; | ||
readLen=$(samtools view {params.s1} | awk \'{{print length($10)}}\' | head -10000 | awk \'{{ sum += $1 }} END {{ if (NR > 0) print sum / NR }}\') | ||
rmats.py --gtf {params.gtf} --b1 {input.b1} --b2 {input.b2} --od {params.od} --tmp $MYTEMP -t {params.end} --libType {params.libType} --readLength $readLen --variable-read-length --nthread {threads} --tstat {threads} 2> {log}; | ||
rm -rf $MYTEMP | ||
""" |
Oops, something went wrong.