Skip to content

Commit

Permalink
add modern dna option
Browse files Browse the repository at this point in the history
  • Loading branch information
maxibor committed Oct 25, 2018
1 parent 7f45bdc commit b1d91ff
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 53 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ install:
- source activate test-environment

script:
- nextflow run main.nf -profile test_index
- nextflow run main.nf -profile test_index_adna
24 changes: 18 additions & 6 deletions bin/plotAndReport
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ def get_args():
default=None,
help="csv filename output. Default = coproid.csv"
)
parser.add_argument(
"-adna",
dest="adna",
default='true',
help="adna (true | false)"
)
parser.add_argument(
'-o',
dest="output",
Expand All @@ -47,10 +53,11 @@ def get_args():
countFile = args.countFile
identity = float(str(args.identity))
version = str(args.version)
adna = str(args.adna)
outfile = args.output
csv = args.csv

return(countFile, identity, version, outfile, csv)
return(countFile, identity, version, adna, outfile, csv)


def getBasename(file_name):
Expand Down Expand Up @@ -78,7 +85,12 @@ def write_csv(adict, CSV):


if __name__ == "__main__":
CF, ID, VERSION, OUTFILE, CSV = get_args()
CF, ID, VERSION, ADNA, OUTFILE, CSV = get_args()

if ADNA == 'true':
ADNA = True
elif ADNA == 'false':
ADNA = False

identity = ID * 100

Expand Down Expand Up @@ -211,16 +223,16 @@ if __name__ == "__main__":
d[asample]["orga2"].replace("_", "\\;") + "}\\right) = " + str(round(d[asample]["nrr"], 3)) + "$ \n\n")
fw.write(conclusion + '\n\n')
fw.write("- **MapDamage plots**\n\n")
if d[asample]["nbp1"] > 20:
if d[asample]["nbp1"] > 20 and ADNA == True:
fw.write("![MapDamage Fragmisincorporation plot for " +
orga1_clean + ". Very jagged and irregular substitutions are likely indicating spurious damages caused by a lack of reads. ](./" + asample + "." + d[asample]["orga1"] + ".fragmisincorporation_plot.png)\n\n")
else:
fw.write(
"No plot is displayed because the number of bases aligned is too low for mapDamage\n\n")
if d[asample]["nbp2"] > 20:
"\n\nNo plot is displayed because the number of bases aligned is too low for mapDamage, or it is Modern DNA\n\n")
if d[asample]["nbp2"] > 20 and ADNA == True:
fw.write("![MapDamage Fragmisincorporation plot for " +
orga2_clean + ". Very jagged and irregular substitutions are likely indicating spurious damages caused by a lack of reads. If there is not plot displayed, the aligned read count was too low for mapDamage](./" + asample + "." + d[asample]["orga2"] + ".fragmisincorporation_plot.png)\n\n")
else:
fw.write(
"No plot is displayed because the number of bases aligned is too low for mapDamage\n\n")
"\n\nNo plot is displayed because the number of bases aligned is too low for mapDamage, or it is Modern DNA\n\n")
fw.write("\n\n***\n")
123 changes: 78 additions & 45 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def helpMessage() {
--genome2 Path to candidate 2 Coprolite maker's genome fasta file (must be surrounded with quotes)
Options:
--adna Specified if data is modern (false) or ancient DNA (true). Default = ${params.adna}
--phred Specifies the fastq quality encoding (33 | 64). Defaults to ${params.phred}
--index1 Path to Bowtie2 index genome andidate 1 Coprolite maker's genome, in the form of /path/to/*.bt2 - Required if genome1 is not set
--index2 Path to Bowtie2 index genome andidate 2 Coprolite maker's genome, in the form of /path/to/*.bt2 - Required if genome2 is not set
Expand All @@ -67,11 +68,12 @@ def helpMessage() {
}


version = "0.6"
version_date = "October 11th, 2018"
version = "0.6.1"
version_date = "October 25th, 2018"

params.phred = 33

params.adna = true
params.results = "./results"
params.reads = ''
params.genome1 = ''
Expand Down Expand Up @@ -433,7 +435,8 @@ if (params.collapse == "yes"){

// 3: Checking for read PMD with PMDtools

process pmdtoolsgenome1 {
if (params.adna){
process pmdtoolsgenome1 {
tag "$name"

conda 'bioconda::pmdtools'
Expand All @@ -449,24 +452,25 @@ process pmdtoolsgenome1 {
"""
samtools view -h -F 4 $bam1 | pmdtools -t ${params.pmdscore} --header $library | samtools view -Sb - > $outfile
"""
}
}

process pmdtoolsgenome2 {
tag "$name"
process pmdtoolsgenome2 {
tag "$name"

conda 'bioconda::pmdtools'
conda 'bioconda::pmdtools'

label 'ristretto'
label 'ristretto'

input:
set val(name), file(bam2) from alignment_genome2
output:
set val(name), file("*.pmd_filtered.bam") into pmd_aligned2
script:
outfile = name+"_"+params.name2+".pmd_filtered.bam"
"""
samtools view -h -F 4 $bam2 | pmdtools -t ${params.pmdscore} --header $library | samtools view -Sb - > $outfile
"""
input:
set val(name), file(bam2) from alignment_genome2
output:
set val(name), file("*.pmd_filtered.bam") into pmd_aligned2
script:
outfile = name+"_"+params.name2+".pmd_filtered.bam"
"""
samtools view -h -F 4 $bam2 | pmdtools -t ${params.pmdscore} --header $library | samtools view -Sb - > $outfile
"""
}
}

// 4: Count aligned bp on each genome and compute ratio
Expand All @@ -479,7 +483,9 @@ process countBp{
label 'expresso'

input:
set val(name), file(bam1), file(bam2) from pmd_aligned1.join(pmd_aligned2)

set val(name), file(bam1), file(bam2) from ( params.adna ? pmd_aligned1.join(pmd_aligned2) : alignment_genome1.join(alignment_genome2))
// set val(name), file(bam1), file(bam2) from pmd_aligned1.join(pmd_aligned2)
file(genome1) from genome1Size.first()
file(genome2) from genome2Size.first()
output:
Expand All @@ -501,7 +507,8 @@ process countBp{

// 5: MapDamage

process mapdamageGenome1 {
if (params.adna){
process mapdamageGenome1 {
tag "$name"

conda 'bioconda::mapdamage2 conda-forge::imagemagick'
Expand All @@ -527,36 +534,38 @@ process mapdamageGenome1 {
mapDamage -i $align -r $fasta -d $name -t $plot_title
gs -sDEVICE=png16m -dTextAlphaBits=4 -r300 -o $fname $pdfloc
"""
}
}

process mapdamageGenome2 {
tag "$name"
process mapdamageGenome2 {
tag "$name"

conda 'bioconda::mapdamage2 conda-forge::imagemagick'
conda 'bioconda::mapdamage2 conda-forge::imagemagick'

label 'ristretto'
label 'ristretto'

errorStrategy 'ignore'
errorStrategy 'ignore'

publishDir "${params.results}/mapdamage_${orgaName}", mode: 'copy'
publishDir "${params.results}/mapdamage_${orgaName}", mode: 'copy'

input:
set val(name), file(align) from filtered_bam2
file(fasta) from genome2mapdamage.first()
output:
set val(name), file("$name/*.pdf") into mapdamagePDF_result_genome2
file("*.fragmisincorporation_plot.png") into mapdamage_result_genome2
script:
orgaName = params.name2
plot_title = name+"_"+orgaName
fname = name+"."+orgaName+".fragmisincorporation_plot.png"
pdfloc = name+"/Fragmisincorporation_plot.pdf"
"""
mapDamage -i $align -r $fasta -d $name -t $plot_title
gs -sDEVICE=png16m -dTextAlphaBits=4 -r300 -o $fname $pdfloc
"""
input:
set val(name), file(align) from filtered_bam2
file(fasta) from genome2mapdamage.first()
output:
set val(name), file("$name/*.pdf") into mapdamagePDF_result_genome2
file("*.fragmisincorporation_plot.png") into mapdamage_result_genome2
script:
orgaName = params.name2
plot_title = name+"_"+orgaName
fname = name+"."+orgaName+".fragmisincorporation_plot.png"
pdfloc = name+"/Fragmisincorporation_plot.pdf"
"""
mapDamage -i $align -r $fasta -d $name -t $plot_title
gs -sDEVICE=png16m -dTextAlphaBits=4 -r300 -o $fname $pdfloc
"""
}
}


// 6: concatenate read ratios

process concatenateRatios {
Expand Down Expand Up @@ -593,13 +602,37 @@ process proportionAndReport {
outfile = "coproID_result.md"
csvout = "coproid_result.csv"
"""
plotAndReport -c $count -i ${params.identity} -v $version -csv $csvout -o $outfile
plotAndReport -c $count -i ${params.identity} -v $version -csv $csvout -o $outfile -adna ${params.adna}
"""
}

// 8: Convert Markdown report to HTML
if (params.adna){
process md2html_adna {

conda 'conda-forge::pandoc'

process md2html {
label 'ristretto'

errorStrategy 'ignore'

publishDir "${params.results}", mode: 'copy'

input:
file(mdplot1) from mapdamage_result_genome1.collect().ifEmpty([])
file(mdplot1) from mapdamage_result_genome2.collect().ifEmpty([])
file(report) from coproidmd
file(fig) from plot
output:
file("*.html") into htmlReport
script:
outfile = "coproID_result.html"
"""
pandoc --self-contained --css $css --webtex -s $report -o $outfile
"""
}
} else {
process md2html_modern {

conda 'conda-forge::pandoc'

Expand All @@ -610,8 +643,6 @@ process md2html {
publishDir "${params.results}", mode: 'copy'

input:
file(mdplot1) from mapdamage_result_genome1.collect()
file(mdplot1) from mapdamage_result_genome2.collect()
file(report) from coproidmd
file(fig) from plot
output:
Expand All @@ -621,10 +652,12 @@ process md2html {
"""
pandoc --self-contained --css $css --webtex -s $report -o $outfile
"""
}
}




// 9: MultiQC
process multiqc {

Expand Down
29 changes: 28 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ profiles {
process.executor = 'local'
}

test_index {
test_index_adna {
process.executor = 'local'
params {
reads = 'data/reads/*.{1,2}.fastq.gz'
Expand All @@ -28,6 +28,33 @@ profiles {
}
}

test_index_modern {
process.executor = 'local'
params {
adna = false
collapse = 'no'
reads = 'data/reads/*.{1,2}.fastq.gz'
index1 = 'data/genomes/bsubtilis/Bowtie2Index/*.bt2'
index2 = 'data/genomes/ecoli/Bowtie2Index/*.bt2'
genome1 = 'data/genomes/bsubtilis/genome.fa'
genome2 = 'data/genomes/ecoli/genome.fa'
name1 = "Bacillus_subtilis"
name2 = "Escherichia_coli"
phred = 64
}
process {
withLabel : intenso {
cpus = 1
}
withLabel : expresso {
cpus = 1
}
withLabel : ristretto {
cpus = 1
}
}
}

test_genome {
process.executor = 'local'
params {
Expand Down

0 comments on commit b1d91ff

Please sign in to comment.