Skip to content

Commit

Permalink
Merge pull request #29 from ewels/master
Browse files Browse the repository at this point in the history
Major refactoring
  • Loading branch information
ewels committed Sep 1, 2017
2 parents 29621e1 + ae3e5e7 commit 1f85e81
Show file tree
Hide file tree
Showing 15 changed files with 374 additions and 607 deletions.
16 changes: 10 additions & 6 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,14 @@ install:
- sudo ln -s /tmp/nextflow/nextflow /usr/local/bin/nextflow
- cd ${TRAVIS_BUILD_DIR}/tests/

env:
- CLFLAGS="" # Basic run
- CLFLAGS="-b" # Run, build reference genome
- CLFLAGS="-r" # Run, RRBS mode
- CLFLAGS="-n" # Run, no-trimming mode
# Commented out until I figure out how to handle multiple docker images nicely
# - CLFLAGS="-p" # BWAmeth use existing ref genome
# - CLFLAGS="-p -b" # BWAmeth run, build reference genome

script:
- "./docker_test.sh" # Basic run
- "./docker_test.sh -b" # Run, build reference genome
- "./docker_test.sh -r" # Run, RRBS mode
- "./docker_test.sh -n" # Run, no-trimming mode
- "./docker_test.sh -p" # BWAmeth use existing ref genome
- "./docker_test.sh -p -b" # BWAmeth run, build reference genome
- "./run_test.sh $CLFLAGS"
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## v0.3dev
* Fix `--rrbs` mode ([#24](https://github.com/SciLifeLab/NGI-MethylSeq/issues/24))
* Fixed fairly major bug where only a single sample would run past alignment
* Merged test scripts and rewrote to use command line flags / new travis script.
* Refactored software version collection code to be more resilient and cleaner / easier to maintain.

Expand Down
34 changes: 5 additions & 29 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
FROM openjdk:8
# openjdk:8 moved from debian jessie to stretch after u131, which breaks everything (bowtie)
FROM openjdk:8u121

LABEL authors="phil.ewels@scilifelab.se,denis.moreno@scilifelab.se" \
description="Docker image containing all requirements for NGI-MethylSeq pipeline"
Expand Down Expand Up @@ -52,12 +53,6 @@ RUN curl -fsSL https://github.com/samtools/samtools/releases/download/1.5/samtoo
make install && \
rm /opt/samtools-1.5.tar.bz2

# Install PicardTools
RUN curl -fsSL https://github.com/broadinstitute/picard/releases/download/2.0.1/picard-tools-2.0.1.zip -o /opt/picard-tools-2.0.1.zip && \
unzip /opt/picard-tools-2.0.1.zip -d /opt/ && \
rm /opt/picard-tools-2.0.1.zip
ENV PICARD_HOME /opt/picard-tools-2.0.1

# Install Bowtie2
RUN mkdir /opt/bowtie2 && \
curl -fsSL https://downloads.sourceforge.net/project/bowtie-bio/bowtie2/2.3.2/bowtie2-2.3.2-source.zip -o /opt/bowtie2/bowtie2.zip && \
Expand All @@ -82,27 +77,8 @@ RUN mkdir /opt/Qualimap && \
ln -s /opt/Qualimap/qualimap_v2.2.1/qualimap /usr/local/bin/qualimap && \
rm /opt/Qualimap/qualimap.zip

# Install BWA
RUN mkdir /opt/bwa && \
curl -fsSL https://downloads.sourceforge.net/project/bio-bwa/bwa-0.7.15.tar.bz2 -o /opt/bwa/bwa.tar.bz2 && \
tar xvjf /opt/bwa/bwa.tar.bz2 -C /opt/bwa/ && \
cd /opt/bwa/bwa-0.7.15/ && \
make && \
ln -s /opt/bwa/bwa-0.7.15/bwa /usr/local/bin/bwa && \
rm /opt/bwa/bwa.tar.bz2

# Install bwa-meth
RUN pip install toolshed && \
pip install git+git://github.com/brentp/bwa-meth.git

# Install MethylDackel
RUN mkdir /opt/MethylDackel && \
curl -fsSL https://github.com/dpryan79/MethylDackel/archive/0.2.1.zip -o /opt/MethylDackel/MethylDackel.zip && \
unzip /opt/MethylDackel/MethylDackel.zip -d /opt/MethylDackel && \
cd /opt/MethylDackel/MethylDackel-0.2.1 && \
make && \
make install prefix=/usr/local/bin && \
rm /opt/MethylDackel/MethylDackel.zip

# Install MultiQC
RUN pip install git+https://github.com/ewels/MultiQC.git

# Create UPPMAX directories for Singularity
RUN mkdir /pica /proj /sw
95 changes: 95 additions & 0 deletions Dockerfile-bwa
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# openjdk:8 moved from debian jessie to stretch after u131, which breaks everything (bowtie)
FROM openjdk:8u121

LABEL authors="phil.ewels@scilifelab.se,denis.moreno@scilifelab.se" \
description="Docker image containing all requirements for NGI-MethylSeq pipeline"

# Install container-wide requrements gcc, pip, zlib, libssl, make, libncurses, fortran77, g++, R
RUN apt-get update && \
apt-get install -y --no-install-recommends \
g++ \
gcc \
gfortran \
libbz2-dev \
libcurl4-openssl-dev \
libgsl0-dev \
liblzma-dev \
libncurses5-dev \
libpcre3-dev \
libreadline-dev \
libssl-dev \
libtbb-dev \
make \
python-dev \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*

# Install pip
RUN curl -fsSL https://bootstrap.pypa.io/get-pip.py -o /opt/get-pip.py && \
python /opt/get-pip.py && \
rm /opt/get-pip.py

RUN curl -fsSL http://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.5.zip -o /opt/fastqc_v0.11.5.zip && \
unzip /opt/fastqc_v0.11.5.zip -d /opt/ && \
chmod 755 /opt/FastQC/fastqc && \
ln -s /opt/FastQC/fastqc /usr/local/bin/fastqc && \
rm /opt/fastqc_v0.11.5.zip

# Install cutadapt
RUN pip install cutadapt

# Install TrimGalore
RUN mkdir /opt/TrimGalore && \
curl -fsSL https://github.com/FelixKrueger/TrimGalore/archive/0.4.4.zip -o /opt/TrimGalore/0.4.4.zip && \
unzip /opt/TrimGalore/0.4.4.zip -d /opt/TrimGalore && \
ln -s /opt/TrimGalore/TrimGalore-0.4.4/trim_galore /usr/local/bin/trim_galore && \
rm /opt/TrimGalore/0.4.4.zip

# Install SAMTools
RUN curl -fsSL https://github.com/samtools/samtools/releases/download/1.5/samtools-1.5.tar.bz2 -o /opt/samtools-1.5.tar.bz2 && \
tar xvjf /opt/samtools-1.5.tar.bz2 -C /opt/ && \
cd /opt/samtools-1.5 && \
make && \
make install && \
rm /opt/samtools-1.5.tar.bz2

# Install PicardTools
RUN curl -fsSL https://github.com/broadinstitute/picard/releases/download/2.0.1/picard-tools-2.0.1.zip -o /opt/picard-tools-2.0.1.zip && \
unzip /opt/picard-tools-2.0.1.zip -d /opt/ && \
rm /opt/picard-tools-2.0.1.zip
ENV PICARD_HOME /opt/picard-tools-2.0.1

# Install Qualimap
RUN mkdir /opt/Qualimap && \
curl -fsSL https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.2.1.zip -o /opt/Qualimap/qualimap.zip && \
unzip /opt/Qualimap/qualimap.zip -d /opt/Qualimap && \
ln -s /opt/Qualimap/qualimap_v2.2.1/qualimap /usr/local/bin/qualimap && \
rm /opt/Qualimap/qualimap.zip

# Install BWA
RUN mkdir /opt/bwa && \
curl -fsSL https://downloads.sourceforge.net/project/bio-bwa/bwa-0.7.15.tar.bz2 -o /opt/bwa/bwa.tar.bz2 && \
tar xvjf /opt/bwa/bwa.tar.bz2 -C /opt/bwa/ && \
cd /opt/bwa/bwa-0.7.15/ && \
make && \
ln -s /opt/bwa/bwa-0.7.15/bwa /usr/local/bin/bwa && \
rm /opt/bwa/bwa.tar.bz2

# Install bwa-meth
RUN pip install toolshed && \
pip install git+git://github.com/brentp/bwa-meth.git

# Install MethylDackel
RUN mkdir /opt/MethylDackel && \
curl -fsSL https://github.com/dpryan79/MethylDackel/archive/0.2.1.zip -o /opt/MethylDackel/MethylDackel.zip && \
unzip /opt/MethylDackel/MethylDackel.zip -d /opt/MethylDackel && \
cd /opt/MethylDackel/MethylDackel-0.2.1 && \
make && \
make install prefix=/usr/local/bin && \
rm /opt/MethylDackel/MethylDackel.zip

# Install MultiQC
RUN pip install multiqc

# Create UPPMAX directories for Singularity
RUN mkdir /pica /proj /sw
7 changes: 3 additions & 4 deletions bismark.nf
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ if( params.bismark_index ){
bismark_index = Channel
.fromPath(params.bismark_index)
.ifEmpty { exit 1, "Bismark index not found: ${params.bismark_index}" }
makeBismarkIndex_stderr = Channel.empty()
}
else if ( params.fasta ){
fasta = file(params.fasta)
Expand Down Expand Up @@ -279,8 +278,8 @@ process bismark_align {
}

input:
file index from bismark_index
set val(name), file(reads) from trimmed_reads
file index from bismark_index.collect()

output:
file "*.bam" into bam, bam_2
Expand Down Expand Up @@ -523,7 +522,7 @@ process multiqc {
file ('bismark/*') from bismark_reports_results.collect()
file ('bismark/*') from bismark_summary_results.collect()
file ('qualimap/*') from qualimap_results.collect()
file ('software_versions/*') from software_versions_yaml
file ('software_versions/*') from software_versions_yaml.collect()

output:
file "*_report.html" into multiqc_report
Expand Down Expand Up @@ -569,7 +568,7 @@ workflow.onComplete {
if(workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId
if(workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision
if(workflow.container) email_fields['summary']['Docker image'] = workflow.container
email_fields['summary']['Nextflow Version'] = nextflow.version
email_fields['summary']['Nextflow Version'] = workflow.nextflow.version
email_fields['summary']['Nextflow Build'] = workflow.nextflow.build
email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp

Expand Down
152 changes: 7 additions & 145 deletions conf/aws.config
Original file line number Diff line number Diff line change
Expand Up @@ -16,153 +16,15 @@ docker {
process {
container = 'scilifelab/ngi-methylseq'
executor = 'ignite'
time = { 2.h * task.attempt }
errorStrategy = { task.exitStatus == 143 ? 'retry' : 'finish' }
maxRetries = 3
maxErrors = '-1'


errorStrategy = { ( task.exitStatus == 143 || task.exitStatus == 137 ) ? 'retry' : 'finish' }
maxRetries = 3
maxErrors = '-1'

// Process resource requirements
$makeBismarkIndex {
time = { 36.h * task.attempt }
}
$fastqc {
errorStrategy = { ( task.exitStatus == 143 || task.exitStatus == 137 ) ? 'retry' : 'ignore' }
}
$trim_galore {
time = { 12.h * task.attempt }
}
$bismark_align {
time = { 36.h * task.attempt }
}
$bismark_deduplicate {
time = { 12.h * task.attempt }
}
$bismark_methXtract {
time = { 8.h * task.attempt }
}
$bismark_report {
errorStrategy = { ( task.exitStatus == 143 || task.exitStatus == 137 ) ? 'retry' : 'ignore' }
}
$bismark_summary {
errorStrategy = { ( task.exitStatus == 143 || task.exitStatus == 137 ) ? 'retry' : 'ignore' }
}
$qualimap {
time = { 6.h * task.attempt }
errorStrategy = { ( task.exitStatus == 143 || task.exitStatus == 137 ) ? 'retry' : 'ignore' }
}
$get_software_versions {
errorStrategy = 'ignore'
}
$multiqc {
errorStrategy = { ( task.exitStatus == 143 || task.exitStatus == 137 ) ? 'retry' : 'ignore' }
}

$bwamem_align {
time = { 24.h * task.attempt }
}
$samtools_sort {
time = { 8.h * task.attempt }
}

}

params {
saveReference = true
// illumina iGenomes reference file paths on AWS
genomes {
'GRCh37' {
bismark = 's3://ngi-igenomes/igenomes/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta'
}
'GRCm38' {
bismark = 's3://ngi-igenomes/igenomes/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta'
}
'TAIR10' {
bismark = 's3://ngi-igenomes/igenomes/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta'
}
'EB2' {
bismark = 's3://ngi-igenomes/igenomes/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta'
}
'UMD3.1' {
bismark = 's3://ngi-igenomes/igenomes/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta'
}
'WBcel235' {
bismark = 's3://ngi-igenomes/igenomes/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta'
}
'CanFam3.1' {
bismark = 's3://ngi-igenomes/igenomes/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta'
}
'GRCz10' {
bismark = 's3://ngi-igenomes/igenomes/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta'
}
'BDGP6' {
bismark = 's3://ngi-igenomes/igenomes/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta'
}
'EquCab2' {
bismark = 's3://ngi-igenomes/igenomes/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta'
}
'EB1' {
bismark = 's3://ngi-igenomes/igenomes/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta'
}
'Galgal4' {
bismark = 's3://ngi-igenomes/igenomes/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta'
}
'Gm01' {
bismark = 's3://ngi-igenomes/igenomes/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta'
}
'Mmul_1' {
bismark = 's3://ngi-igenomes/igenomes/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta'
}
'IRGSP-1.0' {
bismark = 's3://ngi-igenomes/igenomes/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta'
}
'CHIMP2.1.4' {
bismark = 's3://ngi-igenomes/igenomes/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta'
}
'Rnor_6.0' {
bismark = 's3://ngi-igenomes/igenomes/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta'
}
'R64-1-1' {
bismark = 's3://ngi-igenomes/igenomes/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta'
}
'EF2' {
bismark = 's3://ngi-igenomes/igenomes/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta'
}
'Sbi1' {
bismark = 's3://ngi-igenomes/igenomes/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta'
}
'Sscrofa10.2' {
bismark = 's3://ngi-igenomes/igenomes/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta'
}
'AGPv3' {
bismark = 's3://ngi-igenomes/igenomes/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex'
fasta = 's3://ngi-igenomes/igenomes/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta'
}
}

igenomes_base = 's3://ngi-igenomes/igenomes/'
// Max resources based on a typical AWS m4.4xlarge EC2 instance
// Customise with --max_memory, --max_cpus and --max_time (or overwrite with another config file)
// For example, for m4.2xlarge, use `--max_cpus 8 --max_memory 32`
max_memory = 64.GB
max_cpus = 16
max_time = 240.h
}
Loading

0 comments on commit 1f85e81

Please sign in to comment.