From 15e501e72df4783509e8a9c2ef35177b8e711c7d Mon Sep 17 00:00:00 2001
From: nservant <nservant@curie.fr>
Date: Tue, 30 Apr 2019 11:53:32 +0200
Subject: [PATCH 1/2] fix mardown

---
 CHANGELOG.md                            |  23 ++---
 bin/scrape_software_versions.py         |  10 +-
 conf/base.config                        |   2 +-
 docs/configuration/local.md             |   1 +
 docs/configuration/reference_genomes.md |   3 +-
 docs/installation.md                    |   2 +-
 docs/output.md                          |   4 +-
 docs/troubleshooting.md                 |   4 +-
 docs/usage.md                           | 124 ++++++++++++------------
 main.nf                                 |   4 +-
 nextflow.config                         |   2 +-
 11 files changed, 93 insertions(+), 86 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2539792..b982a57 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,14 +2,15 @@
 
 ## v1.0dev - 2019-04-09
 
-	First version of nf-core-hic pipeline which is a Nextflow implementation of the HiC-Pro pipeline [https://github.com/nservant/HiC-Pro].
-	Note that all HiC-Pro functionalities are not yet all implemented. The current version is designed for protocols based on restriction enzyme digestion.
-
-	In summary, this version allows :
-	* Automatic detection and generation of annotation files based on igenomes if not provided.
-	* Two-steps alignment of raw sequencing reads
-	* Reads filtering and detection of valid interaction products
-	* Generation of raw contact matrices for a set of resolutions
-	* Normalization of the contact maps using the ICE algorithm
-	* Generation of cooler file for visualization on higlass [https://higlass.io/]
-	* Quality report based on HiC-Pro MultiQC module
+First version of nf-core-hic pipeline which is a Nextflow implementation of the [HiC-Pro pipeline](https://github.com/nservant/HiC-Pro/).
+Note that all HiC-Pro functionalities are not yet all implemented. The current version is designed for protocols based on restriction enzyme digestion.
+
+In summary, this version allows :
+
+* Automatic detection and generation of annotation files based on igenomes if not provided.
+* Two-steps alignment of raw sequencing reads
+* Reads filtering and detection of valid interaction products
+* Generation of raw contact matrices for a set of resolutions
+* Normalization of the contact maps using the ICE algorithm
+* Generation of cooler file for visualization on [higlass](https://higlass.io/)
+* Quality report based on HiC-Pro MultiQC module
diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
index 8cf977c..7a38fee 100755
--- a/bin/scrape_software_versions.py
+++ b/bin/scrape_software_versions.py
@@ -3,17 +3,21 @@
 from collections import OrderedDict
 import re
 
-# TODO nf-core: Add additional regexes for new tools in process get_software_versions
+# Add additional regexes for new tools in process get_software_versions
 regexes = {
     'nf-core/hic': ['v_pipeline.txt', r"(\S+)"],
     'Nextflow': ['v_nextflow.txt', r"(\S+)"],
-    'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"],
+    'Bowtie2': ['v_bowtie2.txt', r"Bowtie2 v(\S+)"],
+    'Python': ['v_python.txt', r"Python v(\S+)"],
+    'Samtools': ['v_samtools.txt', r"Samtools v(\S+)"],
     'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"],
 }
 results = OrderedDict()
 results['nf-core/hic'] = '<span style="color:#999999;\">N/A</span>'
 results['Nextflow'] = '<span style="color:#999999;\">N/A</span>'
-results['FastQC'] = '<span style="color:#999999;\">N/A</span>'
+results['Bowtie2'] = '<span style="color:#999999;\">N/A</span>'
+results['Python'] = '<span style="color:#999999;\">N/A</span>'
+results['Samtools'] = '<span style="color:#999999;\">N/A</span>'
 results['MultiQC'] = '<span style="color:#999999;\">N/A</span>'
 
 # Search each file using its regex
diff --git a/conf/base.config b/conf/base.config
index 156fa28..28b4679 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -11,7 +11,7 @@
 
 process {
 
-  // TODO nf-core: Check the defaults for all processes
+  // Check the defaults for all processes
   cpus = { check_max( 1 * task.attempt, 'cpus' ) }
   memory = { check_max( 8.GB * task.attempt, 'memory' ) }
   time = { check_max( 2.h * task.attempt, 'time' ) }
diff --git a/docs/configuration/local.md b/docs/configuration/local.md
index 9cd485e..d4530fa 100644
--- a/docs/configuration/local.md
+++ b/docs/configuration/local.md
@@ -10,6 +10,7 @@ Nextflow has [excellent integration](https://www.nextflow.io/docs/latest/docker.
 First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/)
 
 Then, simply run the analysis pipeline:
+
 ```bash
 nextflow run nf-core/hic -profile docker --genome '<genome ID>'
 ```
diff --git a/docs/configuration/reference_genomes.md b/docs/configuration/reference_genomes.md
index 1fafa8f..c52faf8 100644
--- a/docs/configuration/reference_genomes.md
+++ b/docs/configuration/reference_genomes.md
@@ -39,11 +39,12 @@ Multiple reference index types are held together with consistent structure for m
 We have put a copy of iGenomes up onto AWS S3 hosting and this pipeline is configured to use this by default.
 The hosting fees for AWS iGenomes are currently kindly funded by a grant from Amazon.
 The pipeline will automatically download the required reference files when you run the pipeline.
-For more information about the AWS iGenomes, see https://ewels.github.io/AWS-iGenomes/
+For more information about the AWS iGenomes, see [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/)
 
 Downloading the files takes time and bandwidth, so we recommend making a local copy of the iGenomes resource.
 Once downloaded, you can customise the variable `params.igenomes_base` in your custom configuration file to point to the reference location.
 For example:
+
 ```nextflow
 params.igenomes_base = '/path/to/data/igenomes/'
 ```
diff --git a/docs/installation.md b/docs/installation.md
index 70c4a6d..9ac66d5 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -74,7 +74,7 @@ Be warned of two important points about this default configuration:
 #### 3.1) Software deps: Docker
 First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/)
 
-Then, running the pipeline with the option `-profile docker` tells Nextflow to enable Docker for this run. An image containing all of the software requirements will be automatically fetched and used from dockerhub (https://hub.docker.com/r/nfcore/hic).
+Then, running the pipeline with the option `-profile docker` tells Nextflow to enable Docker for this run. An image containing all of the software requirements will be automatically fetched and used from [dockerhub](https://hub.docker.com/r/nfcore/hic).
 
 #### 3.1) Software deps: Singularity
 If you're not able to use Docker then [Singularity](http://singularity.lbl.gov/) is a great alternative.
diff --git a/docs/output.md b/docs/output.md
index f395dcd..53c9c0c 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -64,7 +64,7 @@ Short range interactions that are likely to be spurious ligation products can th
 
 The validPairs are stored using a simple tab-delimited text format ;
 
-```
+```bash
 read name / chr_reads1 / pos_reads1 / strand_reads1 / chr_reads2 / pos_reads2 / strand_reads2 / fragment_size / res frag name R1 / res frag R2 / mapping qual R1 / mapping qual R2 [/ allele_specific_tag]
 ```
 
@@ -102,7 +102,7 @@ A contact map is defined by :
 
 Based on the observation that a contact map is symmetric and usually sparse, only non-zero values are stored for half of the matrix. The user can specified if the 'upper', 'lower' or 'complete' matrix has to be stored. The 'asis' option allows to store the contacts as they are observed from the valid pairs files.
 
-```
+```bash
    A   B   10
    A   C   23
    B   C   24
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
index e6772eb..e0f2d07 100644
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -7,11 +7,11 @@ If only no file, only one input file , or only read one and not read two is pick
 1. The path must be enclosed in quotes (`'` or `"`)
 2. The path must have at least one `*` wildcard character. This is even if you are only running one paired end sample.
 3. When using the pipeline with paired end data, the path must use `{1,2}` or `{R1,R2}` notation to specify read pairs.
-4.  If you are running Single end data make sure to specify `--singleEnd`
+4. If you are running Single end data make sure to specify `--singleEnd`
 
 If the pipeline can't find your files then you will get the following error
 
-```
+```bash
 ERROR ~ Cannot find any reads matching: *{1,2}.fastq.gz
 ```
 
diff --git a/docs/usage.md b/docs/usage.md
index 4f6825e..9b2bb6a 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -7,69 +7,69 @@
 * [Updating the pipeline](#updating-the-pipeline)
 * [Reproducibility](#reproducibility)
 * [Main arguments](#main-arguments)
-    * [`-profile`](#-profile-single-dash)
-        * [`awsbatch`](#awsbatch)
-        * [`conda`](#conda)
-        * [`docker`](#docker)
-        * [`singularity`](#singularity)
-        * [`test`](#test)
-    * [`--reads`](#--reads)
-    * [`--singleEnd`](#--singleend)
+  * [`-profile`](#-profile-single-dash)
+    * [`awsbatch`](#awsbatch)
+    * [`conda`](#conda)
+    * [`docker`](#docker)
+    * [`singularity`](#singularity)
+    * [`test`](#test)
+  * [`--reads`](#--reads)
+  * [`--singleEnd`](#--singleend)
 * [Reference genomes](#reference-genomes)
-    * [`--genome`](#--genome)
-    * [`--fasta`](#--fasta)
-    * [`--igenomesIgnore`](#--igenomesignore)
-    * [`--bwt2_index`](#--bwt2_index)
-    * [`--chromosome_size`](#--chromosome_size)
-    * [`--restriction_fragments`](#--restriction_fragments)
+  * [`--genome`](#--genome)
+  * [`--fasta`](#--fasta)
+  * [`--igenomesIgnore`](#--igenomesignore)
+  * [`--bwt2_index`](#--bwt2_index)
+  * [`--chromosome_size`](#--chromosome_size)
+  * [`--restriction_fragments`](#--restriction_fragments)
 * [Hi-C specific options](#hi-c-specific-options)
-    * [Reads mapping](#reads-mapping)
-        * [`--bwt2_opts_end2end`](#--bwt2_opts_end2end)
-        * [`--bwt2_opts_trimmed`](#--bwt2_opts_trimmed)
-        * [`--min_mapq`](#--min_mapq)
-    * [Digestion Hi-C](#digestion-hi-c)
-        * [`--restriction_site`](#--restriction_site)
-        * [`--ligation_site`](#--ligation_site)
-        * [`--min_restriction_fragment_size`](#--min_restriction_fragment_size)
-        * [`--max_restriction_fragment_size`](#--max_restriction_fragment_size)
-        * [`--min_insert_size`](#--min_insert_size)
-        * [`--max_insert_size`](#--max_insert_size)
-   * [DNase Hi-C](#dnase-hi-c)
-        * [`--dnase`](#--dnase)
-   * [Hi-C Processing](#hi-c-processing)
-        * [`--min_cis_dist`](#--min_cis_dist)
-        * [`--rm_singleton`](#--rm_singleton)
-        * [`--rm_dup`](#--rm_dup)
-        * [`--rm_multi`](#--rm_multi)
-    * [Genome-wide contact maps](#genome-wide-contact-maps)
-        * [`--bins_size`](#--bins_size)
-        * [`--ice_max_iter`](#--ice_max_iter)
-        * [`--ice_filer_low_count_perc`](#--ice_filer_low_count_perc)
-        * [`--ice_filer_high_count_perc`](#--ice_filer_high_count_perc)
-        * [`--ice_eps`](#--ice_eps)
-    * [Inputs/Outputs](#inputs-outputs)
-        * [`--splitFastq`](#--splitFastq)
-        * [`--saveReference`](#--saveReference)
-        * [`--saveAlignedIntermediates`](#--saveAlignedIntermediates)
+  * [Reads mapping](#reads-mapping)
+    * [`--bwt2_opts_end2end`](#--bwt2_opts_end2end)
+    * [`--bwt2_opts_trimmed`](#--bwt2_opts_trimmed)
+    * [`--min_mapq`](#--min_mapq)
+  * [Digestion Hi-C](#digestion-hi-c)
+    * [`--restriction_site`](#--restriction_site)
+    * [`--ligation_site`](#--ligation_site)
+    * [`--min_restriction_fragment_size`](#--min_restriction_fragment_size)
+    * [`--max_restriction_fragment_size`](#--max_restriction_fragment_size)
+    * [`--min_insert_size`](#--min_insert_size)
+    * [`--max_insert_size`](#--max_insert_size)
+  * [DNase Hi-C](#dnase-hi-c)
+    * [`--dnase`](#--dnase)
+  * [Hi-C Processing](#hi-c-processing)
+    * [`--min_cis_dist`](#--min_cis_dist)
+    * [`--rm_singleton`](#--rm_singleton)
+    * [`--rm_dup`](#--rm_dup)
+    * [`--rm_multi`](#--rm_multi)
+  * [Genome-wide contact maps](#genome-wide-contact-maps)
+    * [`--bins_size`](#--bins_size)
+    * [`--ice_max_iter`](#--ice_max_iter)
+    * [`--ice_filer_low_count_perc`](#--ice_filer_low_count_perc)
+    * [`--ice_filer_high_count_perc`](#--ice_filer_high_count_perc)
+    * [`--ice_eps`](#--ice_eps)
+  * [Inputs/Outputs](#inputs-outputs)
+    * [`--splitFastq`](#--splitFastq)
+    * [`--saveReference`](#--saveReference)
+    * [`--saveAlignedIntermediates`](#--saveAlignedIntermediates)
 * [Job resources](#job-resources)
 * [Automatic resubmission](#automatic-resubmission)
 * [Custom resource requests](#custom-resource-requests)
 * [AWS batch specific parameters](#aws-batch-specific-parameters)
-    * [`-awsbatch`](#-awsbatch)
-    * [`--awsqueue`](#--awsqueue)
-    * [`--awsregion`](#--awsregion)
+  * [`-awsbatch`](#-awsbatch)
+  * [`--awsqueue`](#--awsqueue)
+  * [`--awsregion`](#--awsregion)
 * [Other command line parameters](#other-command-line-parameters)
-    * [`--outdir`](#--outdir)
-    * [`--email`](#--email)
-    * [`-name`](#-name-single-dash)
-    * [`-resume`](#-resume-single-dash)
-    * [`-c`](#-c-single-dash)
-    * [`--custom_config_version`](#--custom_config_version)
-    * [`--max_memory`](#--max_memory)
-    * [`--max_time`](#--max_time)
-    * [`--max_cpus`](#--max_cpus)
-    * [`--plaintext_email`](#--plaintext_email)
-    * [`--multiqc_config`](#--multiqc_config)
+  * [`--outdir`](#--outdir)
+  * [`--email`](#--email)
+  * [`-name`](#-name-single-dash)
+  * [`-resume`](#-resume-single-dash)
+  * [`-c`](#-c-single-dash)
+  * [`--custom_config_version`](#--custom_config_version)
+  * [`--max_memory`](#--max_memory)
+  * [`--max_time`](#--max_time)
+  * [`--max_cpus`](#--max_cpus)
+  * [`--plaintext_email`](#--plaintext_email)
+  * [`--multiqc_config`](#--multiqc_config)
 
 
 ## General Nextflow info
@@ -83,6 +83,7 @@ NXF_OPTS='-Xms1g -Xmx4g'
 
 ## Running the pipeline
 The typical command for running the pipeline is as follows:
+
 ```bash
 nextflow run nf-core/hic --reads '*_R{1,2}.fastq.gz' -genome GRCh37 -profile docker
 ```
@@ -135,8 +136,6 @@ If `-profile` is not specified at all the pipeline will be run locally and expec
   * A profile with a complete configuration for automated testing
   * Includes links to test data so needs no other parameters
 
-<!-- TODO nf-core: Document required command line parameters -->
-
 ### `--reads`
 Use this to specify the location of your input FastQ files. For example:
 
@@ -211,7 +210,8 @@ The bowtie2 indexes are required to run the Hi-C pipeline. If the `--bwt2_index`
 
 The Hi-C pipeline will also requires a two-columns text file with the chromosome name and its size (tab separated).
 If not specified, this file will be automatically created by the pipeline. In the latter case, the `--fasta` reference genome has to be specified.
-```
+
+```bash
    chr1    249250621
    chr2    243199373
    chr3    198022430
@@ -233,7 +233,7 @@ If not specified, this file will be automatically created by the pipeline. In th
 
 Finally, Hi-C experiments based on restriction enzyme digestion requires a BED file with coordinates of restriction fragments.
 
-```
+```bash
    chr1   0       16007   HIC_chr1_1    0   +
    chr1   16007   24571   HIC_chr1_2    0   +
    chr1   24571   27981   HIC_chr1_3    0   +
@@ -445,7 +445,7 @@ The `--splitFastq` option allows to automatically split input read pairs into ch
 
 If specified, annotation files automatically generated from the `--fasta` file are exported in the results folder. Default: false
 
-```
+```bash
 --saveReference
 ```
 
@@ -453,7 +453,7 @@ If specified, annotation files automatically generated from the `--fasta` file a
 
 If specified, all intermediate mapping files are saved and exported in the results folder. Default: false
 
-```
+```bash
 --saveReference
 ```
 
diff --git a/main.nf b/main.nf
index bccbb0d..17ff4d3 100644
--- a/main.nf
+++ b/main.nf
@@ -11,7 +11,7 @@
 
 
 def helpMessage() {
-    // TODO nf-core: Add to this help message with new command line parameters
+    // Add to this help message with new command line parameters
     log.info nfcoreHeader()
     log.info"""
 
@@ -868,7 +868,7 @@ workflow.onComplete {
     email_fields['summary']['Nextflow Build'] = workflow.nextflow.build
     email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
 
-    // TODO nf-core: If not using MultiQC, strip out this code (including params.maxMultiqcEmailFileSize)
+    // If not using MultiQC, strip out this code (including params.maxMultiqcEmailFileSize)
     // On success try attach the multiqc report
     def mqc_report = null
     try {
diff --git a/nextflow.config b/nextflow.config
index a526e9e..eba5139 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -9,7 +9,7 @@
 params {
 
   // Workflow flags
-  // TODO nf-core: Specify your pipeline's command line flags
+  // Specify your pipeline's command line flags
   reads = "*{1,2}.fastq.gz"
   outdir = './results'
   genome = false

From 06bc08eaaa34994269e4881399e2ca4e944c9226 Mon Sep 17 00:00:00 2001
From: nservant <nservant@curie.fr>
Date: Tue, 30 Apr 2019 14:57:40 +0200
Subject: [PATCH 2/2] fix conda env

---
 environment.yml | 6 +++---
 main.nf         | 4 +++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/environment.yml b/environment.yml
index ed47c8b..745fdbc 100644
--- a/environment.yml
+++ b/environment.yml
@@ -6,7 +6,7 @@ channels:
   - bioconda
   - defaults
 dependencies:
-  - python=2.7.13
+  - python=2.7.16
   - pip=18.1
   - conda-forge::scipy=1.0.1
   - conda-forge::numpy=1.9.3
@@ -15,7 +15,7 @@ dependencies:
   - bioconda::pysam=0.14.1
   - cooler=0.8.3
   - bowtie2=2.3.5
-  - samtools=1.7
-  - multiqc=1.6
+  - samtools=1.9
+  - bioconda::multiqc=1.7
   - pip:
     - iced==0.4.2
diff --git a/main.nf b/main.nf
index 17ff4d3..eeb6923 100644
--- a/main.nf
+++ b/main.nf
@@ -236,6 +236,7 @@ summary['Run Name']         = custom_runName ?: workflow.runName
 summary['Reads']            = params.reads
 summary['splitFastq']       = params.splitFastq
 summary['Fasta Ref']        = params.fasta
+summary['Restriction Motif']= params.restriction_site
 summary['Ligation Motif']   = params.ligation_site
 summary['DNase Mode']       = params.dnase
 summary['Remove Dup']       = params.rm_dup
@@ -311,8 +312,9 @@ process get_software_versions {
    echo $workflow.manifest.version > v_pipeline.txt
    echo $workflow.nextflow.version > v_nextflow.txt
    bowtie2 --version > v_bowtie2.txt
-   python --version > v_python.txt
+   python --version > v_python.txt 2>&1
    samtools --version > v_samtools.txt
+   multiqc --version > v_multiqc.txt
    scrape_software_versions.py &> software_versions_mqc.yaml
    """
 }