Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deseq2 spike handling #2534

Merged
merged 14 commits into from
Nov 23, 2022
1 change: 1 addition & 0 deletions modules/nf-core/deseq2/differential/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ process DESEQ2_DIFFERENTIAL {

input:
tuple val(meta), path(samplesheet), path(counts)
tuple val(control_genes_meta), path(control_genes_file)

output:
tuple val(meta), path("*.deseq2.results.tsv") , emit: results
Expand Down
8 changes: 8 additions & 0 deletions modules/nf-core/deseq2/differential/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ input:
calls at the pipeline level e.g. [ variable:'treatment', reference:'treated',
control:'saline', blocking:'' ] passed in as ext.args like: '--reference_level
$meta.reference --treatment_level $meta.target'
- control_genes:
type: file
description: |
Text file listing control genes, one per line
- control_genes_meta:
type: file
description: |
Meta map describing control genes, e.g. [ id: 'ERCC' ]

output:
- results:
Expand Down
14 changes: 14 additions & 0 deletions modules/nf-core/deseq2/differential/templates/deseq_de.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ opt <- list(
reference_level = NULL,
treatment_level = NULL,
blocking_variables = NULL,
control_genes_file = '$control_genes_file',
sizefactors_from_controls = FALSE,
gene_id_col = "gene_id",
sample_id_col = "experiment_accession",
test = "Wald",
Expand Down Expand Up @@ -268,12 +270,24 @@ model <- paste(model, opt\$contrast_variable, sep = ' + ')
################################################
################################################

if (opt\$control_genes_file != ''){
control_genes <- readLines(opt\$control_genes_file)
if (! opt\$sizefactors_from_controls){
count.table <- count.table[setdiff(rownames(count.table), control_genes),]
}
}

dds <- DESeqDataSetFromMatrix(
countData = round(count.table),
colData = sample.sheet,
design = as.formula(model)
)

if (opt\$control_genes_file != '' && opt\$sizefactors_from_controls){
print(paste('Estimating size factors using', length(control_genes), 'control genes'))
dds <- estimateSizeFactors(dds, controlGenes=rownames(count.table) %in% control_genes)
}

dds <- DESeq(
dds,
test = opt\$test,
Expand Down
94 changes: 91 additions & 3 deletions tests/modules/nf-core/deseq2/differential/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,27 @@ process tsv_to_csv {

}

// Take the first 50 genes and pretend they're spikes for testing control gene
// functionality

process spoof_spikes {

input:
path expression_matrix

output:
path 'spikes.txt'

script:
"""
head -n 50 $expression_matrix | \
tail -n +2 | \
awk '{print \$1}' > spikes.txt.tmp
mv spikes.txt.tmp spikes.txt
"""
}
empty_spikes = [[],[]]

workflow test_deseq2_differential {

expression_sample_sheet = file(params.test_data['mus_musculus']['genome']['rnaseq_samplesheet'], checkIfExists: true)
Expand All @@ -36,7 +57,72 @@ workflow test_deseq2_differential {
}

DESEQ2_DIFFERENTIAL (
input
input,
empty_spikes
)
}

// Try with spikes as control genes

workflow test_deseq2_differential_spikes {

expression_sample_sheet = file(params.test_data['mus_musculus']['genome']['rnaseq_samplesheet'], checkIfExists: true)
expression_matrix_file = file(params.test_data['mus_musculus']['genome']['rnaseq_matrix'], checkIfExists: true)
expression_contrasts = file(params.test_data['mus_musculus']['genome']['rnaseq_contrasts'], checkIfExists: true)

Channel.fromPath(expression_contrasts)
.splitCsv ( header:true, sep:',' )
.map{
tuple(it, expression_sample_sheet, expression_matrix_file)
}
.set{
input
}

// Make our fake spikes and pretend they're ERCC controls

spoof_spikes(expression_matrix_file)
.map{
tuple(['id':'ERCC'], it)
}.set{
ch_spikes
}

DESEQ2_DIFFERENTIAL (
input,
ch_spikes
)
}

// Try with spikes as control genes, but stripping rather than using

workflow test_deseq2_differential_strip_spikes {

expression_sample_sheet = file(params.test_data['mus_musculus']['genome']['rnaseq_samplesheet'], checkIfExists: true)
expression_matrix_file = file(params.test_data['mus_musculus']['genome']['rnaseq_matrix'], checkIfExists: true)
expression_contrasts = file(params.test_data['mus_musculus']['genome']['rnaseq_contrasts'], checkIfExists: true)

Channel.fromPath(expression_contrasts)
.splitCsv ( header:true, sep:',' )
.map{
tuple(it, expression_sample_sheet, expression_matrix_file)
}
.set{
input
}

// Make our fake spikes and pretend they're ERCC controls

spoof_spikes(expression_matrix_file)
.map{
tuple(['id':'ERCC'], it)
}.set{
ch_spikes
}

DESEQ2_DIFFERENTIAL (
input,
ch_spikes
)
}

Expand All @@ -58,7 +144,8 @@ workflow test_deseq2_differential_csv {
.splitCsv ( header:true, sep:',' )

DESEQ2_DIFFERENTIAL (
ch_contrasts.combine(ch_samples_and_matrix)
ch_contrasts.combine(ch_samples_and_matrix),
empty_spikes
)
}

Expand All @@ -80,6 +167,7 @@ workflow test_deseq2_differential_vst_nsub {
}

DESEQ2_DIFFERENTIAL (
input
input,
empty_spikes
)
}
6 changes: 6 additions & 0 deletions tests/modules/nf-core/deseq2/differential/nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ process {
withName: 'test_deseq2_differential:DESEQ2_DIFFERENTIAL' {
ext.args = { "--contrast_variable $meta.variable --reference_level $meta.reference --treatment_level $meta.target --blocking_variables $meta.blocking --vs_method rlog" }
}
withName: 'test_deseq2_differential_spikes:DESEQ2_DIFFERENTIAL' {
ext.args = { "--sizefactors_from_controls TRUE --contrast_variable $meta.variable --reference_level $meta.reference --treatment_level $meta.target --blocking_variables $meta.blocking --vs_method rlog" }
}
withName: 'test_deseq2_differential_strip_spikes:DESEQ2_DIFFERENTIAL' {
ext.args = { "--contrast_variable $meta.variable --reference_level $meta.reference --treatment_level $meta.target --blocking_variables $meta.blocking --vs_method rlog" }
}
withName: 'test_deseq2_differential_csv:DESEQ2_DIFFERENTIAL' {
ext.args = { "--contrast_variable $meta.variable --reference_level $meta.reference --treatment_level $meta.target --blocking_variables $meta.blocking --vs_method rlog" }
}
Expand Down
50 changes: 50 additions & 0 deletions tests/modules/nf-core/deseq2/differential/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,56 @@
- path: output/deseq2/treatment-mCherry-hND6.rlog.tsv
md5sum: 1077cf03a8c6a155932eaaed7415b74c

- name: deseq2 differential test_deseq2_differential_spikes
command: nextflow run ./tests/modules/nf-core/deseq2/differential -entry test_deseq2_differential_spikes -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/deseq2/differential/nextflow.config
tags:
- deseq2
- deseq2/differential
files:
- path: output/deseq2/treatment-mCherry-hND6-sample_number.deseq2.results.tsv
md5sum: 150a32eef482244b47ddc7f7ffb1ad2a
- path: output/deseq2/treatment-mCherry-hND6-sample_number.deseq2.sizefactors.tsv
md5sum: 1708374146b4e8c28fbc78fa292102cd
- path: output/deseq2/treatment-mCherry-hND6-sample_number.normalised_counts.tsv
md5sum: 1c9c7ee4ca90ff0d91ae16dbfd64fdc9
- path: output/deseq2/treatment-mCherry-hND6-sample_number.rlog.tsv
md5sum: c28300837df9ca6adeba2a52ea5e181e
- path: output/deseq2/treatment-mCherry-hND6.deseq2.results.tsv
md5sum: 88b3230c5a4ad8717c1d0b38c824f1fc
- path: output/deseq2/treatment-mCherry-hND6.deseq2.sizefactors.tsv
md5sum: 1708374146b4e8c28fbc78fa292102cd
- path: output/deseq2/treatment-mCherry-hND6.normalised_counts.tsv
md5sum: 1c9c7ee4ca90ff0d91ae16dbfd64fdc9
- path: output/deseq2/treatment-mCherry-hND6.rlog.tsv
md5sum: c28300837df9ca6adeba2a52ea5e181e
- path: output/spoof/spikes.txt
md5sum: 8bd8a80b4616fc0a68f69bee6869c8bc

- name: deseq2 differential test_deseq2_differential_strip_spikes
command: nextflow run ./tests/modules/nf-core/deseq2/differential -entry test_deseq2_differential_strip_spikes -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/deseq2/differential/nextflow.config
tags:
- deseq2
- deseq2/differential
files:
- path: output/deseq2/treatment-mCherry-hND6-sample_number.deseq2.results.tsv
md5sum: 23de450bda331e7b9330dfd66042a309
- path: output/deseq2/treatment-mCherry-hND6-sample_number.deseq2.sizefactors.tsv
md5sum: a526a1bb5cb270f5a4a1641fefd289b3
- path: output/deseq2/treatment-mCherry-hND6-sample_number.normalised_counts.tsv
md5sum: bb916c41f91b2ddcc9607619a023ef44
- path: output/deseq2/treatment-mCherry-hND6-sample_number.rlog.tsv
md5sum: 1660bd7529a2f607cd8e71820ea26310
- path: output/deseq2/treatment-mCherry-hND6.deseq2.results.tsv
md5sum: 7a385151de48430e592dbcc72c0d0d75
- path: output/deseq2/treatment-mCherry-hND6.deseq2.sizefactors.tsv
md5sum: a526a1bb5cb270f5a4a1641fefd289b3
- path: output/deseq2/treatment-mCherry-hND6.normalised_counts.tsv
md5sum: bb916c41f91b2ddcc9607619a023ef44
- path: output/deseq2/treatment-mCherry-hND6.rlog.tsv
md5sum: 1660bd7529a2f607cd8e71820ea26310
- path: output/spoof/spikes.txt
md5sum: 8bd8a80b4616fc0a68f69bee6869c8bc

- name: deseq2 differential test_deseq2_differential_csv
command: nextflow run ./tests/modules/nf-core/deseq2/differential -entry test_deseq2_differential_csv -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/deseq2/differential/nextflow.config
tags:
Expand Down