Merge pull request #13 from nservant/dev

update conda env and markdown typo
nf-core · Apr 30, 2019 · 9cb7d76 · 9cb7d76
2 parents 1941bb5 + 06bc08e
commit 9cb7d76
Show file tree

Hide file tree

Showing 12 changed files with 99 additions and 90 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,14 +2,15 @@
 
 ## v1.0dev - 2019-04-09
 
-	First version of nf-core-hic pipeline which is a Nextflow implementation of the HiC-Pro pipeline [https://github.com/nservant/HiC-Pro].
-	Note that all HiC-Pro functionalities are not yet all implemented. The current version is designed for protocols based on restriction enzyme digestion.
-
-	In summary, this version allows :
-	* Automatic detection and generation of annotation files based on igenomes if not provided.
-	* Two-steps alignment of raw sequencing reads
-	* Reads filtering and detection of valid interaction products
-	* Generation of raw contact matrices for a set of resolutions
-	* Normalization of the contact maps using the ICE algorithm
-	* Generation of cooler file for visualization on higlass [https://higlass.io/]
-	* Quality report based on HiC-Pro MultiQC module
+First version of nf-core-hic pipeline which is a Nextflow implementation of the [HiC-Pro pipeline](https://github.com/nservant/HiC-Pro/).
+Note that all HiC-Pro functionalities are not yet all implemented. The current version is designed for protocols based on restriction enzyme digestion.
+
+In summary, this version allows :
+
+* Automatic detection and generation of annotation files based on igenomes if not provided.
+* Two-steps alignment of raw sequencing reads
+* Reads filtering and detection of valid interaction products
+* Generation of raw contact matrices for a set of resolutions
+* Normalization of the contact maps using the ICE algorithm
+* Generation of cooler file for visualization on [higlass](https://higlass.io/)
+* Quality report based on HiC-Pro MultiQC module
diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
@@ -3,17 +3,21 @@
 from collections import OrderedDict
 import re
 
-# TODO nf-core: Add additional regexes for new tools in process get_software_versions
+# Add additional regexes for new tools in process get_software_versions
 regexes = {
     'nf-core/hic': ['v_pipeline.txt', r"(\S+)"],
     'Nextflow': ['v_nextflow.txt', r"(\S+)"],
-    'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"],
+    'Bowtie2': ['v_bowtie2.txt', r"Bowtie2 v(\S+)"],
+    'Python': ['v_python.txt', r"Python v(\S+)"],
+    'Samtools': ['v_samtools.txt', r"Samtools v(\S+)"],
     'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"],
 }
 results = OrderedDict()
 results['nf-core/hic'] = '<span style="color:#999999;\">N/A</span>'
 results['Nextflow'] = '<span style="color:#999999;\">N/A</span>'
-results['FastQC'] = '<span style="color:#999999;\">N/A</span>'
+results['Bowtie2'] = '<span style="color:#999999;\">N/A</span>'
+results['Python'] = '<span style="color:#999999;\">N/A</span>'
+results['Samtools'] = '<span style="color:#999999;\">N/A</span>'
 results['MultiQC'] = '<span style="color:#999999;\">N/A</span>'
 
 # Search each file using its regex

diff --git a/conf/base.config b/conf/base.config
@@ -11,7 +11,7 @@
 
 process {
 
-  // TODO nf-core: Check the defaults for all processes
+  // Check the defaults for all processes
   cpus = { check_max( 1 * task.attempt, 'cpus' ) }
   memory = { check_max( 8.GB * task.attempt, 'memory' ) }
   time = { check_max( 2.h * task.attempt, 'time' ) }

diff --git a/docs/configuration/local.md b/docs/configuration/local.md
@@ -10,6 +10,7 @@ Nextflow has [excellent integration](https://www.nextflow.io/docs/latest/docker.
 First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/)
 
 Then, simply run the analysis pipeline:
+
 ```bash
 nextflow run nf-core/hic -profile docker --genome '<genome ID>'
 ```

diff --git a/docs/configuration/reference_genomes.md b/docs/configuration/reference_genomes.md
@@ -39,11 +39,12 @@ Multiple reference index types are held together with consistent structure for m
 We have put a copy of iGenomes up onto AWS S3 hosting and this pipeline is configured to use this by default.
 The hosting fees for AWS iGenomes are currently kindly funded by a grant from Amazon.
 The pipeline will automatically download the required reference files when you run the pipeline.
-For more information about the AWS iGenomes, see https://ewels.github.io/AWS-iGenomes/
+For more information about the AWS iGenomes, see [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/)
 
 Downloading the files takes time and bandwidth, so we recommend making a local copy of the iGenomes resource.
 Once downloaded, you can customise the variable `params.igenomes_base` in your custom configuration file to point to the reference location.
 For example:
+
 ```nextflow
 params.igenomes_base = '/path/to/data/igenomes/'
 ```
diff --git a/docs/installation.md b/docs/installation.md
@@ -74,7 +74,7 @@ Be warned of two important points about this default configuration:
 #### 3.1) Software deps: Docker
 First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/)
 
-Then, running the pipeline with the option `-profile docker` tells Nextflow to enable Docker for this run. An image containing all of the software requirements will be automatically fetched and used from dockerhub (https://hub.docker.com/r/nfcore/hic).
+Then, running the pipeline with the option `-profile docker` tells Nextflow to enable Docker for this run. An image containing all of the software requirements will be automatically fetched and used from [dockerhub](https://hub.docker.com/r/nfcore/hic).
 
 #### 3.1) Software deps: Singularity
 If you're not able to use Docker then [Singularity](http://singularity.lbl.gov/) is a great alternative.

diff --git a/docs/output.md b/docs/output.md
@@ -64,7 +64,7 @@ Short range interactions that are likely to be spurious ligation products can th
 
 The validPairs are stored using a simple tab-delimited text format ;
 
-```
+```bash
 read name / chr_reads1 / pos_reads1 / strand_reads1 / chr_reads2 / pos_reads2 / strand_reads2 / fragment_size / res frag name R1 / res frag R2 / mapping qual R1 / mapping qual R2 [/ allele_specific_tag]
 ```
 
@@ -102,7 +102,7 @@ A contact map is defined by :
 
 Based on the observation that a contact map is symmetric and usually sparse, only non-zero values are stored for half of the matrix. The user can specified if the 'upper', 'lower' or 'complete' matrix has to be stored. The 'asis' option allows to store the contacts as they are observed from the valid pairs files.
 
-```
+```bash
    A   B   10
    A   C   23
    B   C   24

diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
@@ -7,11 +7,11 @@ If only no file, only one input file , or only read one and not read two is pick
 1. The path must be enclosed in quotes (`'` or `"`)
 2. The path must have at least one `*` wildcard character. This is even if you are only running one paired end sample.
 3. When using the pipeline with paired end data, the path must use `{1,2}` or `{R1,R2}` notation to specify read pairs.
-4.  If you are running Single end data make sure to specify `--singleEnd`
+4. If you are running Single end data make sure to specify `--singleEnd`
 
 If the pipeline can't find your files then you will get the following error
 
-```
+```bash
 ERROR ~ Cannot find any reads matching: *{1,2}.fastq.gz
 ```
 

diff --git a/docs/usage.md b/docs/usage.md
@@ -7,69 +7,69 @@
 * [Updating the pipeline](#updating-the-pipeline)
 * [Reproducibility](#reproducibility)
 * [Main arguments](#main-arguments)
-    * [`-profile`](#-profile-single-dash)
-        * [`awsbatch`](#awsbatch)
-        * [`conda`](#conda)
-        * [`docker`](#docker)
-        * [`singularity`](#singularity)
-        * [`test`](#test)
-    * [`--reads`](#--reads)
-    * [`--singleEnd`](#--singleend)
+  * [`-profile`](#-profile-single-dash)
+    * [`awsbatch`](#awsbatch)
+    * [`conda`](#conda)
+    * [`docker`](#docker)
+    * [`singularity`](#singularity)
+    * [`test`](#test)
+  * [`--reads`](#--reads)
+  * [`--singleEnd`](#--singleend)
 * [Reference genomes](#reference-genomes)
-    * [`--genome`](#--genome)
-    * [`--fasta`](#--fasta)
-    * [`--igenomesIgnore`](#--igenomesignore)
-    * [`--bwt2_index`](#--bwt2_index)
-    * [`--chromosome_size`](#--chromosome_size)
-    * [`--restriction_fragments`](#--restriction_fragments)
+  * [`--genome`](#--genome)
+  * [`--fasta`](#--fasta)
+  * [`--igenomesIgnore`](#--igenomesignore)
+  * [`--bwt2_index`](#--bwt2_index)
+  * [`--chromosome_size`](#--chromosome_size)
+  * [`--restriction_fragments`](#--restriction_fragments)
 * [Hi-C specific options](#hi-c-specific-options)
-    * [Reads mapping](#reads-mapping)
-        * [`--bwt2_opts_end2end`](#--bwt2_opts_end2end)
-        * [`--bwt2_opts_trimmed`](#--bwt2_opts_trimmed)
-        * [`--min_mapq`](#--min_mapq)
-    * [Digestion Hi-C](#digestion-hi-c)
-        * [`--restriction_site`](#--restriction_site)
-        * [`--ligation_site`](#--ligation_site)
-        * [`--min_restriction_fragment_size`](#--min_restriction_fragment_size)
-        * [`--max_restriction_fragment_size`](#--max_restriction_fragment_size)
-        * [`--min_insert_size`](#--min_insert_size)
-        * [`--max_insert_size`](#--max_insert_size)
-   * [DNase Hi-C](#dnase-hi-c)
-        * [`--dnase`](#--dnase)
-   * [Hi-C Processing](#hi-c-processing)
-        * [`--min_cis_dist`](#--min_cis_dist)
-        * [`--rm_singleton`](#--rm_singleton)
-        * [`--rm_dup`](#--rm_dup)
-        * [`--rm_multi`](#--rm_multi)
-    * [Genome-wide contact maps](#genome-wide-contact-maps)
-        * [`--bins_size`](#--bins_size)
-        * [`--ice_max_iter`](#--ice_max_iter)
-        * [`--ice_filer_low_count_perc`](#--ice_filer_low_count_perc)
-        * [`--ice_filer_high_count_perc`](#--ice_filer_high_count_perc)
-        * [`--ice_eps`](#--ice_eps)
-    * [Inputs/Outputs](#inputs-outputs)
-        * [`--splitFastq`](#--splitFastq)
-        * [`--saveReference`](#--saveReference)
-        * [`--saveAlignedIntermediates`](#--saveAlignedIntermediates)
+  * [Reads mapping](#reads-mapping)
+    * [`--bwt2_opts_end2end`](#--bwt2_opts_end2end)
+    * [`--bwt2_opts_trimmed`](#--bwt2_opts_trimmed)
+    * [`--min_mapq`](#--min_mapq)
+  * [Digestion Hi-C](#digestion-hi-c)
+    * [`--restriction_site`](#--restriction_site)
+    * [`--ligation_site`](#--ligation_site)
+    * [`--min_restriction_fragment_size`](#--min_restriction_fragment_size)
+    * [`--max_restriction_fragment_size`](#--max_restriction_fragment_size)
+    * [`--min_insert_size`](#--min_insert_size)
+    * [`--max_insert_size`](#--max_insert_size)
+  * [DNase Hi-C](#dnase-hi-c)
+    * [`--dnase`](#--dnase)
+  * [Hi-C Processing](#hi-c-processing)
+    * [`--min_cis_dist`](#--min_cis_dist)
+    * [`--rm_singleton`](#--rm_singleton)
+    * [`--rm_dup`](#--rm_dup)
+    * [`--rm_multi`](#--rm_multi)
+  * [Genome-wide contact maps](#genome-wide-contact-maps)
+    * [`--bins_size`](#--bins_size)
+    * [`--ice_max_iter`](#--ice_max_iter)
+    * [`--ice_filer_low_count_perc`](#--ice_filer_low_count_perc)
+    * [`--ice_filer_high_count_perc`](#--ice_filer_high_count_perc)
+    * [`--ice_eps`](#--ice_eps)
+  * [Inputs/Outputs](#inputs-outputs)
+    * [`--splitFastq`](#--splitFastq)
+    * [`--saveReference`](#--saveReference)
+    * [`--saveAlignedIntermediates`](#--saveAlignedIntermediates)
 * [Job resources](#job-resources)
 * [Automatic resubmission](#automatic-resubmission)
 * [Custom resource requests](#custom-resource-requests)
 * [AWS batch specific parameters](#aws-batch-specific-parameters)
-    * [`-awsbatch`](#-awsbatch)
-    * [`--awsqueue`](#--awsqueue)
-    * [`--awsregion`](#--awsregion)
+  * [`-awsbatch`](#-awsbatch)
+  * [`--awsqueue`](#--awsqueue)
+  * [`--awsregion`](#--awsregion)
 * [Other command line parameters](#other-command-line-parameters)
-    * [`--outdir`](#--outdir)
-    * [`--email`](#--email)
-    * [`-name`](#-name-single-dash)
-    * [`-resume`](#-resume-single-dash)
-    * [`-c`](#-c-single-dash)
-    * [`--custom_config_version`](#--custom_config_version)
-    * [`--max_memory`](#--max_memory)
-    * [`--max_time`](#--max_time)
-    * [`--max_cpus`](#--max_cpus)
-    * [`--plaintext_email`](#--plaintext_email)
-    * [`--multiqc_config`](#--multiqc_config)
+  * [`--outdir`](#--outdir)
+  * [`--email`](#--email)
+  * [`-name`](#-name-single-dash)
+  * [`-resume`](#-resume-single-dash)
+  * [`-c`](#-c-single-dash)
+  * [`--custom_config_version`](#--custom_config_version)
+  * [`--max_memory`](#--max_memory)
+  * [`--max_time`](#--max_time)
+  * [`--max_cpus`](#--max_cpus)
+  * [`--plaintext_email`](#--plaintext_email)
+  * [`--multiqc_config`](#--multiqc_config)
 
 
 ## General Nextflow info
@@ -83,6 +83,7 @@ NXF_OPTS='-Xms1g -Xmx4g'
 
 ## Running the pipeline
 The typical command for running the pipeline is as follows:
+
 ```bash
 nextflow run nf-core/hic --reads '*_R{1,2}.fastq.gz' -genome GRCh37 -profile docker
 ```
@@ -135,8 +136,6 @@ If `-profile` is not specified at all the pipeline will be run locally and expec
   * A profile with a complete configuration for automated testing
   * Includes links to test data so needs no other parameters
 
-<!-- TODO nf-core: Document required command line parameters -->
-
 ### `--reads`
 Use this to specify the location of your input FastQ files. For example:
 
@@ -211,7 +210,8 @@ The bowtie2 indexes are required to run the Hi-C pipeline. If the `--bwt2_index`
 
 The Hi-C pipeline will also requires a two-columns text file with the chromosome name and its size (tab separated).
 If not specified, this file will be automatically created by the pipeline. In the latter case, the `--fasta` reference genome has to be specified.
-```
+
+```bash
    chr1    249250621
    chr2    243199373
    chr3    198022430
@@ -233,7 +233,7 @@ If not specified, this file will be automatically created by the pipeline. In th
 
 Finally, Hi-C experiments based on restriction enzyme digestion requires a BED file with coordinates of restriction fragments.
 
-```
+```bash
    chr1   0       16007   HIC_chr1_1    0   +
    chr1   16007   24571   HIC_chr1_2    0   +
    chr1   24571   27981   HIC_chr1_3    0   +
@@ -445,15 +445,15 @@ The `--splitFastq` option allows to automatically split input read pairs into ch
 
 If specified, annotation files automatically generated from the `--fasta` file are exported in the results folder. Default: false
 
-```
+```bash
 --saveReference
 ```
 
 #### `--saveAlignedIntermediates`
 
 If specified, all intermediate mapping files are saved and exported in the results folder. Default: false
 
-```
+```bash
 --saveReference
 ```
 

diff --git a/environment.yml b/environment.yml
@@ -6,7 +6,7 @@ channels:
   - bioconda
   - defaults
 dependencies:
-  - python=2.7.13
+  - python=2.7.16
   - pip=18.1
   - conda-forge::scipy=1.0.1
   - conda-forge::numpy=1.9.3
@@ -15,7 +15,7 @@ dependencies:
   - bioconda::pysam=0.14.1
   - cooler=0.8.3
   - bowtie2=2.3.5
-  - samtools=1.7
-  - multiqc=1.6
+  - samtools=1.9
+  - bioconda::multiqc=1.7
   - pip:
     - iced==0.4.2
diff --git a/main.nf b/main.nf
@@ -11,7 +11,7 @@
 
 
 def helpMessage() {
-    // TODO nf-core: Add to this help message with new command line parameters
+    // Add to this help message with new command line parameters
     log.info nfcoreHeader()
     log.info"""
 
@@ -236,6 +236,7 @@ summary['Run Name']         = custom_runName ?: workflow.runName
 summary['Reads']            = params.reads
 summary['splitFastq']       = params.splitFastq
 summary['Fasta Ref']        = params.fasta
+summary['Restriction Motif']= params.restriction_site
 summary['Ligation Motif']   = params.ligation_site
 summary['DNase Mode']       = params.dnase
 summary['Remove Dup']       = params.rm_dup
@@ -311,8 +312,9 @@ process get_software_versions {
    echo $workflow.manifest.version > v_pipeline.txt
    echo $workflow.nextflow.version > v_nextflow.txt
    bowtie2 --version > v_bowtie2.txt
-   python --version > v_python.txt
+   python --version > v_python.txt 2>&1
    samtools --version > v_samtools.txt
+   multiqc --version > v_multiqc.txt
    scrape_software_versions.py &> software_versions_mqc.yaml
    """
 }
@@ -868,7 +870,7 @@ workflow.onComplete {
     email_fields['summary']['Nextflow Build'] = workflow.nextflow.build
     email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
 
-    // TODO nf-core: If not using MultiQC, strip out this code (including params.maxMultiqcEmailFileSize)
+    // If not using MultiQC, strip out this code (including params.maxMultiqcEmailFileSize)
     // On success try attach the multiqc report
     def mqc_report = null
     try {

diff --git a/nextflow.config b/nextflow.config
@@ -9,7 +9,7 @@
 params {
 
   // Workflow flags
-  // TODO nf-core: Specify your pipeline's command line flags
+  // Specify your pipeline's command line flags
   reads = "*{1,2}.fastq.gz"
   outdir = './results'
   genome = false