Merge pull request #703 from nf-core/dev

2.3.2 patch release
nf-core · Mar 16, 2021 · b5a8f0f · b5a8f0f
2 parents 29b6e14 + ab63956
commit b5a8f0f
Show file tree

Hide file tree

Showing 20 changed files with 1,350 additions and 789 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -20,7 +20,7 @@ jobs:
     strategy:
       matrix:
         # Nextflow versions: check pipeline minimum and current latest
-        nxf_ver: ['20.04.0', '']
+        nxf_ver: ['20.07.1', '']
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v2
@@ -34,13 +34,13 @@ jobs:
 
       - name: Build new docker image
         if: env.MATCHED_FILES
-        run: docker build --no-cache . -t nfcore/eager:2.3.1
+        run: docker build --no-cache . -t nfcore/eager:2.3.2
 
       - name: Pull docker image
         if: ${{ !env.MATCHED_FILES }}
         run: |
           docker pull nfcore/eager:dev
-          docker tag nfcore/eager:dev nfcore/eager:2.3.1
+          docker tag nfcore/eager:dev nfcore/eager:2.3.2
 
       - name: Install Nextflow
         env:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,40 @@
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
+## [2.3.2] - 2021-03-16
+
+### `Added`
+
+- [#687](https://github.com/nf-core/eager/pull/687) - Adds Kraken2 unique kmer counting report
+- [#676](https://github.com/nf-core/eager/issues/676) - Refactor help message / summary message formatting to automatic versions using nf-core library
+- [#682](https://github.com/nf-core/eager/issues/682) - Add AdapterRemoval `--qualitymax` flag to allow FASTQ Phred score range max more than 41
+
+### `Fixed`
+
+- [#666](https://github.com/nf-core/eager/issues/666) - Fixed input file staging for `print_nuclear_contamination`
+- [#631](https://github.com/nf-core/eager/issues/631) - Update minimum Nextflow version to 20.07.1, due to unfortunate bug in Nextflow 20.04.1 causing eager to crash if patch pulled
+- Made MultiQC crash behaviour stricter when dealing with large datasets, as reported by @ashildv
+- [#652](https://github.com/nf-core/eager/issues/652) - Added note to documentation that when using `--skip_collapse` this will use _paired-end_ alignment mode with mappers when using PE data
+- [#626](https://github.com/nf-core/eager/issues/626) - Add additional checks to ensure pipeline will give useful error if cells of a TSV column are empty
+- Added note to documentation that when using `--skip_collapse` this will use _paired-end_ alignment mode with mappers when using PE data
+- [#673](https://github.com/nf-core/eager/pull/673) - Fix Kraken database loading when loading from directory instead of compressed file
+- [#688](https://github.com/nf-core/eager/issues/668) - Allow pipeline to complete, even if Qualimap crashes due to an empty or corrupt BAM file for one sample/library
+- [#683](https://github.com/nf-core/eager/pull/683) - Sets `--igenomes_ignore` to true by default, as rarely used by users currently and makes resolving configs less complex
+- Added exit code `140` to re-tryable exit code list to account for certain scheduler wall-time limit fails
+- [#672](https://github.com/nf-core/eager/issues/672) - Removed java parameter from picard tools which could cause memory issues
+- [#679](https://github.com/nf-core/eager/issues/679) - Refactor within-process bash conditions to groovy/nextflow, due to incompatibility with some servers environments
+- [#690](https://github.com/nf-core/eager/pull/690) - Fixed ANGSD output mode for beagle by setting `-doMajorMinor 1` as default in that case
+- [#693](https://github.com/nf-core/eager/issues/693) - Fixed broken TSV input validation for the Colour Chemistry column
+- [#695](https://github.com/nf-core/eager/issues/695) - Fixed incorrect `-profile` order in tutorials (originally written reversed due to [nextflow bug](https://github.com/nextflow-io/nextflow/issues/1792))
+- [#653](https://github.com/nf-core/eager/issues/653) - Fixed file collision errors with sexdeterrmine for two same-named libraries with different strandedness
+
+### `Dependencies`
+
+- Bumped MultiQC to 1.10 for improved functionality
+- Bumped HOPS to 0.35 for MultiQC 1.10 compatibility
+
+### `Deprecated`
+
 ## [2.3.1] - 2021-01-14
 
 ### `Added`

diff --git a/Dockerfile b/Dockerfile
@@ -7,10 +7,10 @@ COPY environment.yml /
 RUN conda env create --quiet -f /environment.yml && conda clean -a
 
 # Add conda installation dir to PATH (instead of doing 'conda activate')
-ENV PATH /opt/conda/envs/nf-core-eager-2.3.1/bin:$PATH
+ENV PATH /opt/conda/envs/nf-core-eager-2.3.2/bin:$PATH
 
 # Dump the details of the installed packages to a file for posterity
-RUN conda env export --name nf-core-eager-2.3.1 > nf-core-eager-2.3.1.yml
+RUN conda env export --name nf-core-eager-2.3.2 > nf-core-eager-2.3.2.yml
 
 # Instruct R processes to use these empty files instead of clashing with a local version
 RUN touch .Rprofile

diff --git a/README.md b/README.md
@@ -1,10 +1,10 @@
-# ![nf-core/eager](docs/images/nf-core-eager_logo.png)
+# ![nf-core/eager](docs/images/nf-core_eager_logo.png)
 
 **A fully reproducible and state-of-the-art ancient DNA analysis pipeline**.
 
 [![GitHub Actions CI Status](https://github.com/nf-core/eager/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/eager/actions)
 [![GitHub Actions Linting Status](https://github.com/nf-core/eager/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/eager/actions)
-[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.04.0-brightgreen.svg)](https://www.nextflow.io/)
+[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.07.1-brightgreen.svg)](https://www.nextflow.io/)
 [![nf-core](https://img.shields.io/badge/nf--core-pipeline-brightgreen.svg)](https://nf-co.re/)
 [![DOI](https://zenodo.org/badge/135918251.svg)](https://zenodo.org/badge/latestdoi/135918251)
 
@@ -158,7 +158,10 @@ of this pipeline:
 
 Those who have provided conceptual guidance, suggestions, bug reports etc.
 
+* [Alexandre Gilardet](https://github.com/alexandregilardet)
 * Arielle Munters
+* [Charles Plessy](https://github.com/charles-plessy)
+* [Åshild Vågene](https://github.com/ashildv)
 * [Hester van Schalkwyk](https://github.com/hesterjvs)
 * [Ido Bar](https://github.com/IdoBar)
 * [Irina Velsko](https://github.com/ivelsko)
@@ -184,7 +187,8 @@ For further information or help, don't hesitate to get in touch on the [Slack `#
 ## Citations
 
 If you use `nf-core/eager` for your analysis, please cite the `eager` preprint as follows:
-> James A. Fellows Yates, Thiseas Christos Lamnidis, Maxime Borry, Aida Andrades Valtueña, Zandra Fagneräs, Stephen Clayton, Maxime U. Garcia, Judith Neukamm, Alexander Peltzer **Reproducible, portable, and efficient ancient genome reconstruction with nf-core/eager** bioRxiv 2020.06.11.145615; [doi: https://doi.org/10.1101/2020.06.11.145615](https://doi.org/10.1101/2020.06.11.145615)
+
+> Fellows Yates JA, Lamnidis TC, Borry M, Valtueña Andrades A, Fagernäs Z, Clayton S, Garcia MU, Neukamm J, Peltzer A. 2021. Reproducible, portable, and efficient ancient genome reconstruction with nf-core/eager. PeerJ 9:e10947. DOI: [10.7717/peerj.10947](https://doi.org/10.7717/peerj.10947).
 
 You can cite the eager zenodo record for a specific version using the following [doi: 10.5281/zenodo.3698082](https://zenodo.org/badge/latestdoi/135918251)
 

diff --git a/bin/kraken_parse.py b/bin/kraken_parse.py
@@ -19,18 +19,24 @@ def _get_args():
         default=50,
         help="Minimum number of hits on clade to report it. Default = 50")
     parser.add_argument(
-        '-o',
-        dest="output",
+        '-or',
+        dest="readout",
         default=None,
-        help="Output file. Default = <basename>.kraken_parsed.csv")
+        help="Read count output file. Default = <basename>.read_kraken_parsed.csv")
+    parser.add_argument(
+        '-ok',
+        dest="kmerout",
+        default=None,
+        help="Kmer Output file. Default = <basename>.kmer_kraken_parsed.csv")
 
     args = parser.parse_args()
 
     infile = args.krakenReport
     countlim = int(args.count)
-    outfile = args.output
+    readout = args.readout
+    kmerout = args.kmerout
 
-    return(infile, countlim, outfile)
+    return(infile, countlim, readout, kmerout)
 
 
 def _get_basename(file_name):
@@ -51,14 +57,23 @@ def parse_kraken(infile, countlim):
 
     '''
     with open(infile, 'r') as f:
-        resdict = {}
+        read_dict = {}
+        kmer_dict = {}
         csvreader = csv.reader(f, delimiter='\t')
         for line in csvreader:
             reads = int(line[1])
             if reads >= countlim:
-                taxid = line[4]
-                resdict[taxid] = reads
-        return(resdict)
+                taxid = line[6]
+                kmer = line[3]
+                unique_kmer = line[4]
+                try:
+                    kmer_duplicity = float(kmer)/float(unique_kmer)
+                except ZeroDivisionError:
+                    kmer_duplicity = 0
+                read_dict[taxid] = reads
+                kmer_dict[taxid] = kmer_duplicity
+
+        return(read_dict, kmer_dict)
 
 
 def write_output(resdict, infile, outfile):
@@ -70,10 +85,17 @@ def write_output(resdict, infile, outfile):
 
 
 if __name__ == '__main__':
-    INFILE, COUNTLIM, outfile = _get_args()
+    INFILE, COUNTLIM, readout, kmerout = _get_args()
 
-    if not outfile:
-        outfile = _get_basename(INFILE)+".kraken_parsed.csv"
+    if not readout:
+        read_outfile = _get_basename(INFILE)+".read_kraken_parsed.csv"
+    else:
+        read_outfile = readout
+    if not kmerout:    
+        kmer_outfile = _get_basename(INFILE)+".kmer_kraken_parsed.csv"
+    else:
+        kmer_outfile = kmerout
 
-    tmp_dict = parse_kraken(infile=INFILE, countlim=COUNTLIM)
-    write_output(resdict=tmp_dict, infile=INFILE, outfile=outfile)
+    read_dict, kmer_dict = parse_kraken(infile=INFILE, countlim=COUNTLIM)
+    write_output(resdict=read_dict, infile=INFILE, outfile=read_outfile)
+    write_output(resdict=kmer_dict, infile=INFILE, outfile=kmer_outfile)
diff --git a/bin/merge_kraken_res.py b/bin/merge_kraken_res.py
@@ -15,21 +15,29 @@ def _get_args():
         formatter_class=argparse.RawDescriptionHelpFormatter,
         description='Merging csv count files in one table')
     parser.add_argument(
-        '-o',
-        dest="output",
-        default="kraken_count_table.csv",
-        help="Output file. Default = kraken_count_table.csv")
+        '-or',
+        dest="readout",
+        default="kraken_read_count_table.csv",
+        help="Read count output file. Default = kraken_read_count_table.csv")
+    parser.add_argument(
+        '-ok',
+        dest="kmerout",
+        default="kraken_kmer_unicity_table.csv",
+        help="Kmer unicity output file. Default = kraken_kmer_unicity_table.csv")
 
     args = parser.parse_args()
 
-    outfile = args.output
+    readout = args.readout
+    kmerout = args.kmerout
 
-    return(outfile)
+    return(readout, kmerout)
 
 
 def get_csv():
     tmp = [i for i in os.listdir() if ".csv" in i]
-    return(tmp)
+    kmer = [i for i in tmp if '.kmer_' in i]
+    read = [i for i in tmp if '.read_' in i]
+    return(read, kmer)
 
 
 def _get_basename(file_name):
@@ -54,8 +62,9 @@ def write_csv(pd_dataframe, outfile):
 
 
 if __name__ == "__main__":
-    OUTFILE = _get_args()
-    all_csv = get_csv()
-    resdf = merge_csv(all_csv)
-    write_csv(resdf, OUTFILE)
-    print(resdf)
+    READOUT, KMEROUT = _get_args()
+    reads, kmers = get_csv()
+    read_df = merge_csv(reads)
+    kmer_df = merge_csv(kmers)
+    write_csv(read_df, READOUT)
+    write_csv(kmer_df, KMEROUT)
diff --git a/conf/base.config b/conf/base.config
@@ -14,7 +14,7 @@ process {
   memory = { check_max( 7.GB * task.attempt, 'memory' ) }
   time = { check_max( 24.h * task.attempt, 'time' ) }
 
-  errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
+  errorStrategy = { task.exitStatus in [143,137,104,134,139, 140] ? 'retry' : 'finish' }
   maxRetries = 3
   maxErrors = '-1'
 
@@ -74,38 +74,34 @@ process {
   }
 
   withName:qualimap{
-    errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'finish' }
+    errorStrategy = { task.exitStatus in [1,143,137,104,134,139, 140] ? 'retry' : task.exitStatus in [255] ? 'ignore' : 'finish' }
   }
 
   withName:preseq {
     errorStrategy = 'ignore'
   }
 
   withName:damageprofiler {
-    errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'finish' }
+    errorStrategy = { task.exitStatus in [1,143,137,104,134,139, 140] ? 'retry' : 'finish' }
   }
 
   // Add 1 retry for certain java tools as not enough heap space java errors gives exit code 1
   withName: dedup {
-    errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'finish' } 
+    errorStrategy = { task.exitStatus in [1,143,137,104,134,139, 140] ? 'retry' : 'finish' } 
   }
 
   withName: markduplicates {
-    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'finish' } 
+    errorStrategy = { task.exitStatus in [143,137, 140] ? 'retry' : 'finish' } 
   }
 
   // Add 1 retry as not enough heapspace java error gives exit code 1
   withName: malt {
-    errorStrategy = { task.exitStatus in [1,143,137,104,134,139] ? 'retry' : 'finish' } 
+    errorStrategy = { task.exitStatus in [1,143,137,104,134,139, 140] ? 'retry' : 'finish' } 
   }
 
   // other process specific exit statuses
   withName: nuclear_contamination {
-    errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'ignore' : 'retry' }
-  }
-
-  withName: multiqc {
-    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
+    errorStrategy = { task.exitStatus in [143,137,104,134,139, 140] ? 'ignore' : 'retry' }
   }
 
 }

diff --git a/docs/images/tutorials/profiles/config_profile_inheritence.png b/docs/images/tutorials/profiles/config_profile_inheritence.png
diff --git a/docs/images/tutorials/profiles/config_profile_inheritence.svg b/docs/images/tutorials/profiles/config_profile_inheritence.svg