Skip to content

Commit

Permalink
Merge pull request #25 from itrujnara/bug_fixes
Browse files Browse the repository at this point in the history
Added MultiQC
  • Loading branch information
JoseEspinosa committed May 10, 2024
2 parents 33b2ccf + fca7756 commit 278d34f
Show file tree
Hide file tree
Showing 11 changed files with 293 additions and 14 deletions.
68 changes: 68 additions & 0 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,71 @@ report_section_order:
export_plots: true

disable_version_detection: true

run_modules:
- custom_content

custom_data:
sample_stats:
id: "sample_stats"
section_name: "Sample Stats"
plot_type: "table"
anchor: "sample_stats"
namespace: "sample_stats"
pconfig:
id: "sample_stats"
title: "Sample statistics"
sample_hits:
id: "sample_hits"
section_name: "Sample Hit Stats"
plot_type: "table"
anchor: "sample_hits"
namespace: "sample_hits"
pconfig:
id: "sample_hits"
title: "Sample hit statistics"

custom_table_header_config:
sample_stats:
percent_max:
title: "Percent Consensus"
description: "Percentage of orthologs with max score."
hidden: False
format: "{:,.3f}"
percent_privates:
title: "Percent Privates"
description: "Percentage of orthologs with score 1."
hidden: False
format: "{:,.3f}"
goodness:
title: "Goodness"
description: "Goodness of the predictions (see docs for details)."
hidden: False
format: "{:,.3f}"
sample_hits:
OMA:
title: "OMA"
description: "Number of orthologs found by OMA."
hidden: False
format: "{:,.0f}"
PANTHER:
title: "PANTHER"
description: "Number of orthologs found by PANTHER."
hidden: False
format: "{:,.0f}"
OrthoInspector:
title: "OrthoInspector"
description: "Number of orthologs found by OrthoInspector."
hidden: False
format: "{:,.0f}"
EggNOG:
title: "EggNOG"
description: "Number of orthologs found by EggNOG."
hidden: False
format: "{:,.0f}"

sp:
sample_stats:
fn: "aggregated_stats.csv"
sample_hits:
fn: "aggregated_hits.csv"
38 changes: 38 additions & 0 deletions bin/make_hits_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env python3

# Written by Igor Trujnara, released under the MIT license
# See https://opensource.org/license/mit for details

import csv
import sys


def main() -> None:
"""
Convert numbers of hits into CSV.
"""
if len(sys.argv) < 3:
print("Usage: python make_hit_table.py <merged_csv> <sample_id>")
sys.exit(1)

# Read the CSV into a list of lists, it has a header
with open(sys.argv[1]) as f:
reader = csv.DictReader(f)
data = list(reader)

sample_id = sys.argv[2]

# Get list of databases
databases = list(data[0].keys())[1:]

# Get counts
sums = {db: sum(int(row[db]) for row in data) for db in databases}

# Print the header
print("id," + ",".join(databases))

# Print the data
print(sample_id + "," + ",".join(str(sums[db]) for db in databases))

if __name__ == "__main__":
main()
18 changes: 18 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,24 @@ process {
]
}

withName: 'MAKE_HITS_TABLE' {
publishDir = [
path: { "${params.outdir}/orthologs/hits" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'MERGE_HITS' {
ext.args = "-u NA"
ext.prefix = "aggregated_hits"
publishDir = [
path: { "${params.outdir}/orthologs/hits" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'MAKE_STATS' {
publishDir = [
path: { "${params.outdir}/orthologs/stats" },
Expand Down
8 changes: 7 additions & 1 deletion docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ Plots representing certain aspects of the predictions are generated using `ggplo
- `orthologs/`
- `stats/`
- `*_stats.yml`: A YAML file containing ortholog statistics.
- `hits/`
- `*_hits.yml`: A YAML file containing hit counts per database.
</details>

The following statistics of the predictions are calculated:
Expand Down Expand Up @@ -179,9 +181,13 @@ The phylogeny can be constructed using maximum likelihood ([IQTREE](http://www.i
- `*.html`: The report in HTML format.
- `run.sh`: A script to correctly open the report.
- Other files necessary for the report.
- `multiqc/`
- `multiqc_report.html`: A MultiQC report containing summary of all samples.
</details>

The report is generated in the form of a React application. It must be hosted on localhost to work correctly. This can be done manually or with the run script provided.
The report is generated per sample in the form of a React application. It must be hosted on localhost to work correctly. This can be done manually or with the run script provided.

A single MultiQC report is also generated. It contains a comparison of hit count and statistics for each sample, as well as a list of software versions used in the run.

### Pipeline information

Expand Down
41 changes: 41 additions & 0 deletions modules/local/make_hits_table.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
process MAKE_HITS_TABLE {
tag "$meta.id"
label 'process_single'

conda "conda-forge::python=3.11.0 conda-forge::biopython=1.83.0 conda-forge::requests=2.31.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-bc54124b36864a4af42a9db48b90a404b5869e7e:5258b8e5ba20587b7cbf3e942e973af5045a1e59-0' :
'biocontainers/mulled-v2-bc54124b36864a4af42a9db48b90a404b5869e7e:5258b8e5ba20587b7cbf3e942e973af5045a1e59-0' }"

input:
tuple val(meta), path(merged_csv)

output:
tuple val(meta), path('*hits_table.csv'), emit: hits_table
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
prefix = task.ext.prefix ?: meta.id
"""
make_hits_table.py $merged_csv ${meta.id} > ${prefix}_hits_table.csv
cat <<- END_VERSIONS > versions.yml
"${task.process}":
Python: \$(python3 --version | cut -d ' ' -f 2)
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}_hits_table.csv
cat <<- END_VERSIONS > versions.yml
"${task.process}":
Python: \$(python3 --version | cut -d ' ' -f 2)
END_VERSIONS
"""
}
8 changes: 3 additions & 5 deletions modules/local/make_report.nf
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,10 @@ process MAKE_REPORT {
mv dist ${prefix}_dist
cat <<- END_VERSIONS > versions.yml
${task.process}:
"${task.process}":
Node: \$(node --version)
Yarn: \$(yarn --version)
React: \$(yarn info react version | cut -d \$'\n' -f 2)
Python: \$(python --version | cut -d ' ' -f 2)
React: \$(yarn info react version | awk 'NR==2{print;exit}')
END_VERSIONS
"""

Expand All @@ -69,8 +68,7 @@ process MAKE_REPORT {
${task.process}:
Node: \$(node --version)
Yarn: \$(yarn --version)
React: \$(yarn view react version)
Python: \$(python --version | cut -d ' ' -f 2)
React: \$(yarn info react version | awk 'NR==2{print;exit}')
END_VERSIONS
"""
}
2 changes: 1 addition & 1 deletion modules/local/plot_orthologs.nf
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ process PLOT_ORTHOLOGS {
plot_orthologs.R $score_table $prefix
cat <<- END_VERSIONS > versions.yml
"${task.process}"
"${task.process}":
r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//')
END_VERSIONS
"""
Expand Down
8 changes: 8 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ params {
input = null
uniprot_query = false

// MultiQC options
multiqc_config = null
multiqc_title = null
multiqc_logo = null
max_multiqc_email_size = '25.MB'
multiqc_methods_description = null


// Ortholog options
use_all = false
local_databases = false
Expand Down
31 changes: 31 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@
"fa_icon": "fas fa-envelope",
"help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.",
"pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
},
"multiqc_title": {
"type": "string",
"description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.",
"fa_icon": "fas fa-file-signature"
}
}
},
Expand Down Expand Up @@ -367,6 +372,14 @@
"fa_icon": "fas fa-remove-format",
"hidden": true
},
"max_multiqc_email_size": {
"type": "string",
"description": "File size limit when attaching MultiQC reports to summary emails.",
"pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$",
"default": "25.MB",
"fa_icon": "fas fa-file-upload",
"hidden": true
},
"monochrome_logs": {
"type": "boolean",
"description": "Do not use coloured log outputs.",
Expand All @@ -380,6 +393,24 @@
"help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.",
"hidden": true
},
"multiqc_config": {
"type": "string",
"format": "file-path",
"description": "Custom config file to supply to MultiQC.",
"fa_icon": "fas fa-cog",
"hidden": true
},
"multiqc_logo": {
"type": "string",
"description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file",
"fa_icon": "fas fa-image",
"hidden": true
},
"multiqc_methods_description": {
"type": "string",
"description": "Custom MultiQC yaml file containing HTML including a methods description.",
"fa_icon": "fas fa-cog"
},
"validate_params": {
"type": "boolean",
"description": "Boolean whether to validate parameters against the schema at runtime",
Expand Down
28 changes: 28 additions & 0 deletions subworkflows/local/get_orthologs.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ include { CSVTK_JOIN as MERGE_CSV } from "../../modules/nf-core/csvtk/join/
include { MAKE_SCORE_TABLE } from "../../modules/local/make_score_table"
include { FILTER_HITS } from "../../modules/local/filter_hits"
include { PLOT_ORTHOLOGS } from "../../modules/local/plot_orthologs"
include { MAKE_HITS_TABLE } from "../../modules/local/make_hits_table"
include { CSVTK_CONCAT as MERGE_HITS } from "../../modules/nf-core/csvtk/concat/main"
include { MAKE_STATS } from "../../modules/local/make_stats"
include { STATS2CSV } from "../../modules/local/stats2csv"
include { CSVTK_CONCAT as MERGE_STATS } from "../../modules/nf-core/csvtk/concat/main"
Expand Down Expand Up @@ -298,6 +300,30 @@ workflow GET_ORTHOLOGS {
.set { ch_versions }
}

// Hits

MAKE_HITS_TABLE(
MERGE_CSV.out.csv
)

ch_versions
.mix(MAKE_HITS_TABLE.out.versions)
.set { ch_versions }

ch_hits = MAKE_HITS_TABLE.out.hits_table
.collect { it[1] }
.map { [[id: "all"], it] }

MERGE_HITS(
ch_hits,
"csv",
"csv"
)

ch_versions
.mix(MERGE_HITS.out.versions)
.set { ch_versions }

// Stats

MAKE_STATS(
Expand Down Expand Up @@ -346,7 +372,9 @@ workflow GET_ORTHOLOGS {
venn_plot = ch_vennplot
jaccard_plot = ch_jaccardplot
stats = MAKE_STATS.out.stats
hits = MAKE_HITS_TABLE.out.hits_table
aggregated_stats = MERGE_STATS.out.csv
aggregated_hits = MERGE_HITS.out.csv
versions = ch_merged_versions

}
Loading

0 comments on commit 278d34f

Please sign in to comment.