Merge pull request #17 from cokelaer/main

Update CI and switch branch from master to main
sequana · Aug 31, 2022 · a79eabf · a79eabf
2 parents c8d0873 + fb779f2
commit a79eabf
Show file tree

Hide file tree

Showing 13 changed files with 412 additions and 100 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -3,7 +3,8 @@ name: Tests
 on:
   push:
     branches:
-      - master
+      - main
+      - dev
   pull_request:
     branches-ignore: []
   schedule:
@@ -37,13 +38,11 @@ jobs:
       run: |
         # $CONDA is an environment variable pointing to the root of the miniconda directory
         echo $CONDA/bin >> $GITHUB_PATH
-        #conda update ruamel_yaml
 
     - name: conda
       run: |
-        conda install -c conda-forge --quiet 'mamba<0.24' python=${{ matrix.python }}
-        mamba install -c bioconda -c conda-forge --quiet  --file environment.yml 
-
+          conda install -c conda-forge -c bioconda --quiet -y python=${{ matrix.python }} bowtie 'samtools>1.7' bamtools bedtools bowtie2 fastqc subread fastp deeptools salmon star 'picard>2.20' gffread  
+          conda install -c conda-forge -y ncurses
     - name: Install dependencies
       run: |
         pip install coveralls pytest-cov pytest pytest-xdist

diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml
@@ -8,9 +8,9 @@ on:
 jobs:
   build-n-publish:
     name: Build and publish to PyPI and TestPyPI
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-20.04
     steps:
-    - uses: actions/checkout@master
+    - uses: actions/checkout@main
     - name: Set up Python 3.7
       uses: actions/setup-python@v1
       with:
@@ -26,14 +26,14 @@ jobs:
           python setup.py sdist
 
     - name: Publish distribution to Test PyPI
-      uses: pypa/gh-action-pypi-publish@master
+      uses: pypa/gh-action-pypi-publish@release/v1
       with:
         user: __token__
         password: ${{ secrets.TEST_PYPI_API_TOKEN }}
         repository_url: https://test.pypi.org/legacy/
     - name: Publish distribution to PyPI
       if: startsWith(github.ref, 'refs/tags')
-      uses: pypa/gh-action-pypi-publish@master
+      uses: pypa/gh-action-pypi-publish@release/v1
       with:
         user: __token__
         password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,5 +1,6 @@
 recursive-exclude * __pycache__
 recursive-exclude * *pyc
+recursive-exclude * doc/wiki
 recursive-include * *rules
 include requirements*txt
 include README.rst

diff --git a/README.rst b/README.rst
@@ -32,7 +32,7 @@ You must install Sequana first::
 
 Then, just install this package::
 
-    pip install sequana_rnaseq
+    pip install sequana_fastqc --upgrade
 
 For all dependencies (see hereafter), you can use conda. Another experimental solution is to use damona::
 
@@ -47,8 +47,8 @@ Usage
 
 ::
 
-    sequana_pipelines_rnaseq --help
-    sequana_pipelines_rnaseq --input-directory DATAPATH --genome-directory genome --aligner star
+    sequana_rnaseq --help
+    sequana_rnaseq --input-directory DATAPATH --genome-directory genome --aligner star
 
 This creates a directory with the pipeline and configuration file. You will then need 
 to execute the pipeline::
@@ -61,7 +61,7 @@ retrieve the pipeline itself and its configuration files and then execute the pi
 
     snakemake -s rnaseq.rules -c config.yaml --cores 4 --stats stats.txt
 
-Or use `sequanix <https://sequana.readthedocs.io/en/master/sequanix.html>`_ interface.
+Or use `sequanix <https://sequana.readthedocs.io/en/main/sequanix.html>`_ interface.
 
 Requirements
 ~~~~~~~~~~~~
@@ -91,10 +91,10 @@ all dependencies for you::
     conda install -c anaconda qt pyqt>5
     pip install sequana
     pip install sequana_rnaseq
-    conda install --file https://raw.githubusercontent.com/sequana/rnaseq/master/conda.yaml
+    conda install --file https://raw.githubusercontent.com/sequana/rnaseq/main/conda.yaml
 
 
-.. image:: https://raw.githubusercontent.com/sequana/sequana_rnaseq/master/sequana_pipelines/rnaseq/dag.png
+.. image:: https://raw.githubusercontent.com/sequana/sequana_rnaseq/main/sequana_pipelines/rnaseq/dag.png
 
 
 Details
@@ -137,7 +137,7 @@ This produces a HTML repot summarizing you differential analysis.
 Rules and configuration details
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Here is the `latest documented configuration file <https://raw.githubusercontent.com/sequana/sequana_rnaseq/master/sequana_pipelines/rnaseq/config.yaml>`_
+Here is the `latest documented configuration file <https://raw.githubusercontent.com/sequana/sequana_rnaseq/main/sequana_pipelines/rnaseq/config.yaml>`_
 to be used with the pipeline. Each rule used in the pipeline may have a section in the configuration file. 
 
 
@@ -150,6 +150,17 @@ Changelog
 ========= ====================================================================
 Version   Description
 ========= ====================================================================
+0.16.0    * star, salmon, bam_coverage are now in sequana wrappers, updated 
+            the pipeline accordingly
+          * updated config file and schema to include resources inside the 
+            config file (so as to use new --profile option)
+          * set singularity images in all rules
+          * star wrappers has changed significantly to use star 
+            recommandation. To keep using previous way, a legacy option
+            is available and set to True in this version.
+          * bamCoverage renamed in bam_coverage in the config file
+          * multiqc_config removed redundant information and ordered
+            the output in a coherent way (QC and then analysis)
 0.15.2    * Fix bowtie2 rule to use new wrappers. Use wrappers in 
             add_read_group and mark_duplicates
 0.15.1    * Adapt to new bowtie2 align wrapper

diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,2 @@
-sequana>=0.12
-sequana_pipetools>=0.5.3
+sequana>=0.14.2
+sequana_pipetools>=0.9.2
diff --git a/sequana_pipelines/rnaseq/config.yaml b/sequana_pipelines/rnaseq/config.yaml
@@ -10,6 +10,13 @@
 input_directory:
 input_readtag: _R[12]_
 input_pattern: '*fastq.gz'
+
+# See sequana_pipetools.readthedocs.io for details about these 2 options
+# common prefixes are removed. addition prefixes may be removed here
+#extra_prefixes_to_strip = []
+# in special cases, sample names can be extracted with a pattern
+#sample_pattern: '{sample}.fastq.gz'
+
 # =========================================== Sections for the users
 
 #############################################################################
@@ -50,6 +57,8 @@ fastqc:
     skip_fastqc_raw: true
     options: --nogroup
     threads: 4
+    resources:
+        mem: 4G
 
 #######################################################################
 # Quality trimming and adapter removal
@@ -78,7 +87,7 @@ fastqc:
 #            of -O to 6 (at least 6 bases are required to match before
 #            trimming of an adapter)
 #
-# tool_choice__ = ["atropos", "cutadapt", "fastp"]
+# tool_choice__ = ["atropos", "cutadapt"]
 #
 # trim-n trims Ns at the end of the read
 cutadapt:
@@ -157,11 +166,27 @@ bowtie1_mapping_rna:
 # - do: if unchecked, this rule is ignored
 # - options: any options recognised by rna-star tool
 # - threads: number of threads to be used
-#
+# - legacy: if set to True will use the old 2-pass version from STAR
+#      used in this pipeline until v0.15.3. If you want to use the
+#      2-pass mode available in star, you will need star 2.7 and above
 star_mapping:
     options: --outFilterMismatchNoverLmax 0.05 --seedSearchStartLmax 20
+    legacy: True
     threads: 4
+    resources:
+      mem: 34G
 
+##############################################################################
+# STAR indexing section
+#
+# :Parameters:
+#
+# - options: string with any valid STAR options
+star_index:
+    options:
+    threads: 4
+    resources:
+      mem: 4G
 #############################################################################
 # bowtie1_mapping_ref used to align reads against genome file
 #
@@ -184,20 +209,31 @@ bowtie1_mapping_ref:
 # - options: any options recognised by bowtie2 tool
 # - threads: number of threads to be used
 #
-bowtie2:
+bowtie2_mapping:
     #options: "--dovetail --no-mixed --no-discordant " for paired-end data
     options: ''
     threads: 4
     genome_size_larger_than_4gb: false
+    resources:
+      mem: 20G
 
 bowtie2_index:
     options: ''
     threads: 4
+    resources:
+      mem: 20G
 
+salmon_index:
+    threads: 2
+    options:
+    resources:
+        mem: 4G
 
 salmon_mapping:
     options: -l A
     threads: 4
+    resources:
+        mem: 4G
 
 #############################################################################
 # feature_counts used to count reads against features
@@ -233,7 +269,7 @@ feature_counts:
     tolerance: 0.15  # use to figure out the strandness. no need to change
     feature: gene    # could be exon, mRNA, etc
     attribute: ID    # could be ID, gene_id, etc
-    extra_attributes: '' # by default, stores only the main attribute, but could add more
+    extra_attributes:    # by default, stores only the main attribute, but could add more
 
 #############################################################################
 # bamCoverage write file in bigwig format from BAM files.
@@ -269,14 +305,17 @@ feature_counts:
 #                      inclusion. A value of 0 disables filtering and is
 #                      needed for including single-end and orphan reads.
 # - threads: number of threads to be used
-coverage:
+bam_coverage:
     do: false
+    options:
     binSize: 10
     genomeSize: 2150570000  ##mm10
     extendReads: 65
     minFragmentLength: 0 #Note that a value other than 0 will exclude all single-end reads.
     maxFragmentLength: 0 #A value of 0 disables filtering and is needed for including single-end and orphan reads.
     threads: 4
+    resources:
+        mem: 20G
 
 
 ###########################################################################
@@ -308,9 +347,11 @@ mark_duplicates:
     remove: false ## may be True
     tmpdir: ./tmp/
     threads: 4
+    resources:
+      mem: 34G
 
 add_read_group:
-    options:
+    options: 
 
 #############################################################################
 # RNA-SeQC allows to compute a series of quality control metrics for RNA-seq data
@@ -324,15 +365,15 @@ add_read_group:
 # - options: any options recognised by RNA-seQC tool
 rnaseqc:
     do: false
-    gtf_file: ''
+    gtf_file:
     options: --coverage
 
 
 # if be_file not provided, try to create one on the fly
 # needs mark_duplicates
 rseqc:
     do: false
-    bed_file: ''
+    bed_file:
 
 
 #############################################################################
@@ -346,10 +387,10 @@ rseqc:
 # - config_file: by default, we use sequana RNA-seq multiqc_config file. 
 #       If you want your own multiqc, fill this entry
 multiqc:
-    options: "-p -f -x *_init_*"
-    modules: 
-    input_directory: "."
-    config_file: "multiqc_config.yaml" 
+    options: -p -f -x *_init_*
+    modules: ''
+    input_directory: .
+    config_file: multiqc_config.yaml
 
 
 
diff --git a/sequana_pipelines/rnaseq/main.py b/sequana_pipelines/rnaseq/main.py
@@ -248,7 +248,7 @@ def main(args=None):
 
         # ------------------------------------------------------ optional
         cfg.igvtools.do = options.do_igvtools
-        cfg.coverage.do = options.do_bam_coverage
+        cfg.bam_coverage.do = options.do_bam_coverage
         cfg.mark_duplicates.do = False
         if options.do_mark_duplicates:
             cfg.mark_duplicates.do = True

diff --git a/sequana_pipelines/rnaseq/multiqc_config.yaml b/sequana_pipelines/rnaseq/multiqc_config.yaml
@@ -122,9 +122,50 @@ sp:
     #    fn: "*coverage.tsv"
 
 # Overwrite the defaults of which table columns are visible by default
+#
+read_count_prefix: ''
+read_count_multiplier: 1
+
 table_columns_visible:
     FastQC:
         percent_fails: False
         total_sequences: True
+        percent_gc: False
+    fastp:
+        pct_duplication: False
+        after_filtering_gc_content: False
+    Bowtie 1:
+        reads_aligned_percentage: False
+        reads_aligned: False
+    picard:
+        PERCENT_DUPLICATION: False
+
+top_modules:
+  - fastqc
+  - fastp
+  - bowtie1
+  - bowtie2
+  - salmon
+  - star
+  - featureCounts
+
+module_order:
+  - fastqc
+  - fastp
+  - rseqc
+  - markduplicates
+  - picard
+  - bowtie1
+  - bowtie2
+  - salmon
+  - star
+  - featureCounts
+
+remove_sections:
+  - fastqc_status_checks
+  - fastqc_per_base_n_content
+
+
 #fastqc_config:
 #fastqc_theoretical_gc: 'mm10_genome'
+#
diff --git a/sequana_pipelines/rnaseq/requirements.txt b/sequana_pipelines/rnaseq/requirements.txt
@@ -13,3 +13,4 @@
 - fastqc
 - samtools
 - bamtools
+- bedtools