Merge pull request #141 from LaurenceKuhl/test_count

count_table as an input fix
nf-core · May 29, 2024 · 7ff5a04 · 7ff5a04
2 parents fd449d3 + af1cbe4
commit 7ff5a04
Show file tree

Hide file tree

Showing 7 changed files with 104 additions and 48 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -32,6 +32,7 @@ jobs:
           - "test_screening_rra"
           - "test_targeted"
           - "test_umis"
+          - "test_screening_count_table"
 
     steps:
       - name: Check out pipeline code

diff --git a/conf/test_screening_count_table.config b/conf/test_screening_count_table.config
@@ -0,0 +1,28 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/crisprseq -profile test_screening_count_table,<conda/docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name        = 'Test screening profile with an input count table'
+    config_profile_description = 'Minimal test dataset to check pipeline function'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus   = 2
+    max_memory = '6.GB'
+    max_time   = '6.h'
+
+    // Input data
+    count_table       = params.pipelines_testdata_base_path + "crisprseq/testdata/count_table.tsv"
+    analysis          = 'screening'
+    mle_design_matrix = params.pipelines_testdata_base_path + "crisprseq/testdata/design_matrix.txt"
+    contrasts         = params.pipelines_testdata_base_path + "crisprseq/testdata/rra_contrasts.txt"
+
+}
diff --git a/docs/usage/targeted.md b/docs/usage/targeted.md
@@ -35,7 +35,7 @@ CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz,G
 
 The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 6 columns to match those defined in the table below.
 
-A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 3 samples, where `chr6` is single-end and has a template sequence _(this is a reduced samplesheet, please refer to the [pipeline example samplesheet](https://github.com/nf-core/test-datasets/blob/crisprseq/testdata/samplesheet_test.csv) to see the full version)_.
+A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 3 samples, where `chr6` is single-end and has a template sequence _(this is a reduced samplesheet, please refer to the [pipeline example samplesheet](https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata-edition/samplesheet_test_full.csv) to see the full version)_.
 
 ```console
 sample,fastq_1,fastq_2,reference,protospacer,template
@@ -50,8 +50,8 @@ chr6,chr6-61942198-61942498_R1.fastq.gz,,CAA...GGA,TTTTATGATATTTATCTTTT,TTC...CA
 | `fastq_1`     | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                             |
 | `fastq_2`     | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". (Optional)                                                  |
 | `reference`   | Reference sequence of the target region.                                                                                                                                               |
-| `protospacer` | Sequence of the protospacer used for CRISPR editing. Must not includ the PAM.                                                                                                          |
-| `template`    | Sequence of the template used in templet-based editing experiments. (Optional)                                                                                                         |
+| `protospacer` | Sequence of the protospacer used for CRISPR editing. Must not include the PAM.                                                                                                         |
+| `template`    | Sequence of the template used in template-based editing experiments. (Optional)                                                                                                        |
 
 An [example samplesheet](https://github.com/nf-core/test-datasets/blob/crisprseq/testdata/samplesheet_test.csv) has been provided with the pipeline.
 

diff --git a/nextflow.config b/nextflow.config
@@ -204,15 +204,16 @@ profiles {
         executor.cpus           = 4
         executor.memory         = 8.GB
     }
-    test                  { includeConfig 'conf/test_targeted.config' }
-    test_targeted         { includeConfig 'conf/test_targeted.config' }
-    test_full             { includeConfig 'conf/test_targeted_full.config' }
-    test_targeted_full    { includeConfig 'conf/test_targeted_full.config' }
-    test_umis             { includeConfig 'conf/test_umis.config' }
-    test_screening_full   { includeConfig 'conf/test_screening_full.config' }
-    test_screening        { includeConfig 'conf/test_screening.config' }
-    test_screening_paired { includeConfig 'conf/test_screening_paired.config' }
-    test_screening_rra    { includeConfig 'conf/test_screening_rra.config' }
+    test                          { includeConfig 'conf/test_targeted.config' }
+    test_targeted                 { includeConfig 'conf/test_targeted.config' }
+    test_full                     { includeConfig 'conf/test_targeted_full.config' }
+    test_targeted_full            { includeConfig 'conf/test_targeted_full.config' }
+    test_umis                     { includeConfig 'conf/test_umis.config' }
+    test_screening_full           { includeConfig 'conf/test_screening_full.config' }
+    test_screening                { includeConfig 'conf/test_screening.config' }
+    test_screening_paired         { includeConfig 'conf/test_screening_paired.config' }
+    test_screening_rra            { includeConfig 'conf/test_screening_rra.config' }
+    test_screening_count_table    { includeConfig 'conf/test_screening_count_table.config' }
 }
 
 // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile

diff --git a/subworkflows/local/utils_nfcore_crisprseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_crisprseq_pipeline/main.nf
@@ -77,43 +77,62 @@ workflow PIPELINE_INITIALISATION {
     //
     validateInputParameters()
 
-    //
-    // Create channel from input file provided through params.input
-    //
-    Channel
-        .fromSamplesheet("input")
-        .multiMap {
-            meta, fastq_1, fastq_2, reference, protospacer, template ->
-                if (fastq_2) {
-                    files = [ fastq_1, fastq_2 ]
-                } else {
-                    files = [ fastq_1 ]
-                }
-                reads_targeted: [ meta.id, meta - meta.subMap('condition') + [ single_end:fastq_2?false:true, self_reference:reference?false:true, template:template?true:false ], files ]
-                reads_screening:[ meta + [ single_end:fastq_2?false:true ], files ]
-                reference:      [meta - meta.subMap('condition') + [ single_end:fastq_2?false:true, self_reference:reference?false:true, template:template?true:false ], reference]
-                protospacer:    [meta - meta.subMap('condition') + [ single_end:fastq_2?false:true, self_reference:reference?false:true, template:template?true:false ], protospacer]
-                template:       [meta - meta.subMap('condition') + [ single_end:fastq_2?false:true, self_reference:reference?false:true, template:template?true:false ], template]
-        }
-        .set { ch_input }
+
+
+    reads_targeted   = Channel.empty()
+    reads_screening  = Channel.empty()
+    fastqc_screening = Channel.empty()
+    reference        = Channel.empty()
+    protospacer      = Channel.empty()
+    template         = Channel.empty()
+    versions         = Channel.empty()
 
     //
-    // Validate input samplesheet
+    // Create channel from input file provided through params.input
     //
-    ch_input.reads_targeted
-    .groupTuple()
-    .map {
-        validateInputSamplesheet(it)
+    if(params.input) {
+        Channel
+            .fromSamplesheet("input")
+            .multiMap {
+                meta, fastq_1, fastq_2, reference, protospacer, template ->
+                    if (fastq_2) {
+                        files = [ fastq_1, fastq_2 ]
+                    } else {
+                        files = [ fastq_1 ]
+                    }
+                    reads_targeted: [ meta.id, meta - meta.subMap('condition') + [ single_end:fastq_2?false:true, self_reference:reference?false:true, template:template?true:false ], files ]
+                    reads_screening:[ meta + [ single_end:fastq_2?false:true ], files ]
+                    reference:      [meta - meta.subMap('condition') + [ single_end:fastq_2?false:true, self_reference:reference?false:true, template:template?true:false ], reference]
+                    protospacer:    [meta - meta.subMap('condition') + [ single_end:fastq_2?false:true, self_reference:reference?false:true, template:template?true:false ], protospacer]
+                    template:       [meta - meta.subMap('condition') + [ single_end:fastq_2?false:true, self_reference:reference?false:true, template:template?true:false ], template]
+            }
+            .set { ch_input }
+
+        //
+        // Validate input samplesheet
+        //
+        ch_input.reads_targeted
+            .groupTuple()
+            .map {
+                validateInputSamplesheet(it)
+            }
+            .set { reads_targeted }
+
+        fastqc_screening = ch_input.reads_screening
+        reference = ch_input.reference
+        protospacer = ch_input.protospacer
+        template = ch_input.template
+    } else {
+        ch_input = Channel.empty()
     }
-    .set { reads_targeted }
 
     emit:
-    reads_targeted = reads_targeted
-    fastqc_screening = ch_input.reads_screening
-    reference = ch_input.reference
-    protospacer = ch_input.protospacer
-    template = ch_input.template
-    versions    = ch_versions
+    reads_targeted
+    fastqc_screening
+    reference
+    protospacer
+    template
+    versions = ch_versions
 }
 
 /*

diff --git a/templates/template_fluteMLE.R b/templates/template_fluteMLE.R
@@ -20,7 +20,12 @@
             FluteMLE(mle, treatname= i, proj=i, pathview.top=5)
             }
     } else {
-        FluteMLE(mle, treatname= "${prefix}", proj="${prefix}", ${args}, pathview.top=5)
+        beta_strings <- grep("\\\\.beta", colnames(mle), value = TRUE)
+        before_beta <- sub("\\\\.beta.*", "", beta_strings)
+        unique_strings <- unique(before_beta)
+        for(i in unique_strings) {
+            FluteMLE(mle, treatname= i, proj=i, ${args}, pathview.top=5)
+        }
     }
 
     version_file_path <- "versions.yml"

diff --git a/workflows/crisprseq_screening.nf b/workflows/crisprseq_screening.nf
@@ -11,6 +11,8 @@ include { BAGEL2_PR                                    } from '../modules/local/
 include { BAGEL2_GRAPH                                 } from '../modules/local/bagel2/graph'
 include { MATRICESCREATION                             } from '../modules/local/matricescreation'
 include { MAGECK_FLUTEMLE                              } from '../modules/local/mageck/flutemle'
+include { MAGECK_FLUTEMLE as MAGECK_FLUTEMLE_CONTRASTS } from '../modules/local/mageck/flutemle'
+include { MAGECK_FLUTEMLE as MAGECK_FLUTEMLE_DAY0      } from '../modules/local/mageck/flutemle'
 include { VENNDIAGRAM                                  } from '../modules/local/venndiagram'
 // nf-core modules
 include { FASTQC                                       } from '../modules/nf-core/fastqc/main'
@@ -277,8 +279,8 @@ workflow CRISPRSEQ_SCREENING {
             ch_mle = MATRICESCREATION.out.design_matrix.combine(ch_counts)
             MAGECK_MLE (ch_mle)
             ch_versions = ch_versions.mix(MAGECK_MLE.out.versions)
-            MAGECK_FLUTEMLE(MAGECK_MLE.out.gene_summary)
-            ch_versions = ch_versions.mix(MAGECK_FLUTEMLE.out.versions)
+            MAGECK_FLUTEMLE_CONTRASTS(MAGECK_MLE.out.gene_summary)
+            ch_versions = ch_versions.mix(MAGECK_FLUTEMLE_CONTRASTS.out.versions)
             ch_venndiagram = BAGEL2_PR.out.pr.join(MAGECK_MLE.out.gene_summary)
             VENNDIAGRAM(ch_venndiagram)
             ch_versions = ch_versions.mix(VENNDIAGRAM.out.versions)
@@ -287,8 +289,8 @@ workflow CRISPRSEQ_SCREENING {
             ch_mle = Channel.of([id: "day0"]).merge(Channel.of([[]])).merge(ch_counts)
             MAGECK_MLE_DAY0 (ch_mle)
             ch_versions = ch_versions.mix(MAGECK_MLE_DAY0.out.versions)
-            MAGECK_FLUTEMLE(MAGECK_MLE_DAY0.out.gene_summary)
-            ch_versions = ch_versions.mix(MAGECK_FLUTEMLE.out.versions)
+            MAGECK_FLUTEMLE_DAY0(MAGECK_MLE_DAY0.out.gene_summary)
+            ch_versions = ch_versions.mix(MAGECK_FLUTEMLE_DAY0.out.versions)
         }
     }