From 24915b61df95e7792965e29615d6e69886af8ed0 Mon Sep 17 00:00:00 2001 From: fxwiegand Date: Tue, 8 Jun 2021 12:25:41 +0200 Subject: [PATCH 01/21] Add new subcommand plot-bam --- src/bam/mod.rs | 1 + src/bam/plot/bam_plot.html.tera | 44 ++++++++++++++ src/bam/plot/mod.rs | 1 + src/bam/plot/plot_bam.rs | 57 +++++++++++++++++++ .../table_report/create_report_table.rs | 14 ++--- src/bcf/report/table_report/static_reader.rs | 26 ++++++--- src/cli.rs | 28 ++++++++- src/main.rs | 18 ++++++ 8 files changed, 174 insertions(+), 15 deletions(-) create mode 100644 src/bam/plot/bam_plot.html.tera create mode 100644 src/bam/plot/mod.rs create mode 100644 src/bam/plot/plot_bam.rs diff --git a/src/bam/mod.rs b/src/bam/mod.rs index e5d400f2..a2238a16 100644 --- a/src/bam/mod.rs +++ b/src/bam/mod.rs @@ -1,3 +1,4 @@ //! Tools that work on BAM files pub mod collapse_reads_to_fragments; pub mod depth; +pub mod plot; diff --git a/src/bam/plot/bam_plot.html.tera b/src/bam/plot/bam_plot.html.tera new file mode 100644 index 00000000..4949dbb8 --- /dev/null +++ b/src/bam/plot/bam_plot.html.tera @@ -0,0 +1,44 @@ + + + + BAM Plot for {{ bam }} + + + + + + + + + + + +
+ +
+ + + diff --git a/src/bam/plot/mod.rs b/src/bam/plot/mod.rs new file mode 100644 index 00000000..53eebc8d --- /dev/null +++ b/src/bam/plot/mod.rs @@ -0,0 +1 @@ +pub mod plot_bam; diff --git a/src/bam/plot/plot_bam.rs b/src/bam/plot/plot_bam.rs new file mode 100644 index 00000000..cdd29452 --- /dev/null +++ b/src/bam/plot/plot_bam.rs @@ -0,0 +1,57 @@ +use crate::bcf::report::table_report::create_report_table::create_report_data; +use crate::bcf::report::table_report::create_report_table::manipulate_json; +use anyhow::Result; +use chrono::{DateTime, Local}; +use itertools::Itertools; +use std::fs::File; +use std::io::Write; +use std::path::Path; +use std::str::FromStr; +use tera::{Context, Tera}; + +pub(crate) fn plot_bam( + bam_path: &str, + fasta_path: &str, + region: &str, + max_read_depth: u32, + output_path: &str, +) -> Result<()> { + let splitted_region = region.split(":").collect_vec(); + let chrom = splitted_region[0]; + let span = splitted_region[1].split("-").collect_vec(); + assert_eq!(2, span.len()); + let start = u64::from_str(span[0])?; + let end = u64::from_str(span[1])?; + + let (content, max_rows) = create_report_data( + Path::new(fasta_path), + None, + Path::new(bam_path), + chrom.to_owned(), + start, + end, + max_read_depth, + )?; + let visualization = manipulate_json(content, start, end, max_rows)?; + + dbg!(&visualization); + + let mut templates = Tera::default(); + templates.add_raw_template("bam_plot.html.tera", include_str!("bam_plot.html.tera"))?; + let mut context = Context::new(); + let local: DateTime = Local::now(); + context.insert("time", &local.format("%a %b %e %T %Y").to_string()); + context.insert("version", &env!("CARGO_PKG_VERSION")); + context.insert("specs", &visualization); + context.insert("bam", &bam_path.to_string()); + context.insert("chrom", &chrom); + context.insert("start", &start); + context.insert("end", &end); + + let html = templates.render("bam_plot.html.tera", &context)?; + let filepath = Path::new(output_path).join("plot.html"); + let mut file = File::create(filepath)?; + file.write_all(html.as_bytes())?; + + Ok(()) +} diff --git a/src/bcf/report/table_report/create_report_table.rs b/src/bcf/report/table_report/create_report_table.rs index 8db7162e..3a6eb4a1 100644 --- a/src/bcf/report/table_report/create_report_table.rs +++ b/src/bcf/report/table_report/create_report_table.rs @@ -325,7 +325,7 @@ pub(crate) fn make_table_report( if pos < 75 { let (content, max_rows) = create_report_data( fasta_path, - var.clone(), + Some(var.clone()), bam_path, chrom.clone(), 0, @@ -337,7 +337,7 @@ pub(crate) fn make_table_report( } else if pos + 75 >= fasta_length as i64 { let (content, max_rows) = create_report_data( fasta_path, - var.clone(), + Some(var.clone()), bam_path, chrom.clone(), pos as u64 - 75, @@ -349,7 +349,7 @@ pub(crate) fn make_table_report( } else { let (content, max_rows) = create_report_data( fasta_path, - var.clone(), + Some(var.clone()), bam_path, chrom.clone(), pos as u64 - 75, @@ -490,9 +490,9 @@ pub(crate) fn read_tag_entries( Ok(()) } -fn create_report_data( +pub(crate) fn create_report_data( fasta_path: &Path, - variant: Variant, + variant: Option, bam_path: &Path, chrom: String, from: u64, @@ -513,7 +513,7 @@ fn create_report_data( from, to, max_read_depth, - &variant, + variant.as_ref(), )?; for b in bases { @@ -533,7 +533,7 @@ fn create_report_data( /// Inserts the json containing the genome data into the vega specs. /// It also changes keys and values of the json data for the vega plot to look better and compresses the json with jsonm. -fn manipulate_json(data: Json, from: u64, to: u64, max_rows: usize) -> Result { +pub(crate) fn manipulate_json(data: Json, from: u64, to: u64, max_rows: usize) -> Result { let json_string = include_str!("vegaSpecs.json"); let mut vega_specs: Value = serde_json::from_str(&json_string)?; diff --git a/src/bcf/report/table_report/static_reader.rs b/src/bcf/report/table_report/static_reader.rs index 3144937c..734b754e 100644 --- a/src/bcf/report/table_report/static_reader.rs +++ b/src/bcf/report/table_report/static_reader.rs @@ -39,7 +39,7 @@ fn calc_rows( reads: Vec, matches: Vec, max_read_depth: u32, - variant: &Variant, + variant: Option<&Variant>, ) -> ( Vec, Vec, @@ -55,8 +55,14 @@ fn calc_rows( let mut max_row = 0; for r in matches { - if r.read_start < variant.start_position as u32 && r.read_end > variant.end_position as u32 - { + let overlaps = if variant.is_some() { + r.read_start < variant.unwrap().start_position as u32 + && r.read_end > variant.unwrap().end_position as u32 + } else { + true + }; + + if overlaps { let mut row: u16 = 0; if read_names.contains_key(&r.name) { @@ -85,8 +91,14 @@ fn calc_rows( } for r in reads { - if r.read_start < variant.start_position as u32 && r.read_end > variant.end_position as u32 - { + let overlaps = if variant.is_some() { + r.read_start < variant.unwrap().start_position as u32 + && r.read_end > variant.unwrap().end_position as u32 + } else { + true + }; + + if overlaps { let mut row: u16 = 0; if read_names.contains_key(&r.name) { @@ -141,7 +153,7 @@ pub fn get_static_reads( from: u64, to: u64, max_read_depth: u32, - variant: &Variant, + variant: Option<&Variant>, ) -> Result<( Vec, Vec, @@ -149,5 +161,5 @@ pub fn get_static_reads( )> { let alignments = read_indexed_bam(path, chrom.clone(), from, to)?; let (msm, m) = make_nucleobases(fasta_path, chrom, alignments, from, to)?; - Ok(calc_rows(msm, m, max_read_depth, &variant)) + Ok(calc_rows(msm, m, max_read_depth, variant)) } diff --git a/src/cli.rs b/src/cli.rs index 7fc143bb..91cc6e2b 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -215,6 +215,32 @@ pub(crate) enum Command { output_path: String, }, + /// Creates a html file with a vega visualization of the given bam region + /// Example: + /// rbt plot-bam input.bam 2:132424-132924 + #[structopt(author = "Felix Wiegand ")] + PlotBam { + /// BAM file to be visualized. + #[structopt()] + bam_path: String, + + /// Path to the reference fasta file. + #[structopt(long, short = "r", default_value = "100")] + reference: String, + + /// Chromosome and region for the visualization. Example: 2:132424-132924 + #[structopt()] + region: String, + + /// Set the maximum lines of reads that will be shown in the alignment plots. Default value is 500. + #[structopt(long, short = "d", default_value = "500")] + max_read_depth: u32, + + /// Relative output path for the html file. Default value is the current directory. + #[structopt(default_value = ".")] + output_path: String, + }, + /// Creates report from a given VCF file including a visual plot /// for every variant with the given BAM and FASTA file. /// The VCF file has to be annotated with VEP, using the options --hgvs and --hgvsg. @@ -246,7 +272,7 @@ pub(crate) enum Command { #[structopt(long, short = "c", default_value = "1000")] cells: u32, - /// Set the maximum number of cells in the oncoprint per page. Lowering max-cells should improve the performance of the plots in the browser. Default value is 1000. + /// Set the maximum lines of reads that will be shown in the alignment plots. Default value is 500. #[structopt(long, short = "d", default_value = "500")] max_read_depth: u32, diff --git a/src/main.rs b/src/main.rs index 336d9492..a9904f2f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -113,6 +113,24 @@ fn main() -> Result<()> { pin_until.as_deref(), )? } + PlotBam { + bam_path, + reference, + region, + max_read_depth, + output_path, + } => { + if !Path::new(&output_path).exists() { + fs::create_dir_all(Path::new(&output_path))?; + } + bam::plot::plot_bam::plot_bam( + &bam_path, + &reference, + ®ion, + max_read_depth, + &output_path, + )? + } VcfReport { fasta, vcfs, From 5634b49e85b663df3b0070b77175f3f84a8a5fbf Mon Sep 17 00:00:00 2001 From: fxwiegand Date: Wed, 9 Jun 2021 12:18:33 +0200 Subject: [PATCH 02/21] Minor fixes --- src/bam/plot/bam_plot.html.tera | 25 +++++++++++++------ src/bam/plot/plot_bam.rs | 2 +- .../table_report/create_report_table.rs | 6 +++-- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/bam/plot/bam_plot.html.tera b/src/bam/plot/bam_plot.html.tera index 4949dbb8..057108e1 100644 --- a/src/bam/plot/bam_plot.html.tera +++ b/src/bam/plot/bam_plot.html.tera @@ -3,14 +3,19 @@ BAM Plot for {{ bam }} + + + + + + + + - + - - - - +
-
diff --git a/src/bam/plot/plot_bam.rs b/src/bam/plot/plot_bam.rs index cdd29452..69fb108a 100644 --- a/src/bam/plot/plot_bam.rs +++ b/src/bam/plot/plot_bam.rs @@ -43,7 +43,7 @@ pub(crate) fn plot_bam( context.insert("time", &local.format("%a %b %e %T %Y").to_string()); context.insert("version", &env!("CARGO_PKG_VERSION")); context.insert("specs", &visualization); - context.insert("bam", &bam_path.to_string()); + context.insert("bam", &Path::new(bam_path).iter().last().unwrap().to_str().unwrap()); context.insert("chrom", &chrom); context.insert("start", &start); context.insert("end", &end); diff --git a/src/bcf/report/table_report/create_report_table.rs b/src/bcf/report/table_report/create_report_table.rs index 3a6eb4a1..02634f49 100644 --- a/src/bcf/report/table_report/create_report_table.rs +++ b/src/bcf/report/table_report/create_report_table.rs @@ -526,7 +526,9 @@ pub(crate) fn create_report_data( data.push(mat); } - data.push(json!(variant)); + if variant.is_some() { + data.push(json!(variant)); + } Ok((Json::from_str(&json!(data).to_string()).unwrap(), max_rows)) } @@ -545,7 +547,7 @@ pub(crate) fn manipulate_json(data: Json, from: u64, to: u64, max_rows: usize) - for (i, _) in v.iter().enumerate() { let k = v[i]["marker_type"].clone().as_str().unwrap().to_owned(); - if k == "A" || k == "T" || k == "G" || k == "C" || k == "U" { + if k == "A" || k == "T" || k == "G" || k == "C" || k == "U" || k == "N" { values["values"][i]["base"] = values["values"][i]["marker_type"].clone(); } else if k == "Deletion" || k == "Match" From 0aff4f90163d81a4eaaac5cc00b4ed86d6cc7e82 Mon Sep 17 00:00:00 2001 From: fxwiegand Date: Wed, 9 Jun 2021 12:32:27 +0200 Subject: [PATCH 03/21] Visual updates --- src/bam/plot/bam_plot.html.tera | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/bam/plot/bam_plot.html.tera b/src/bam/plot/bam_plot.html.tera index 057108e1..6122970b 100644 --- a/src/bam/plot/bam_plot.html.tera +++ b/src/bam/plot/bam_plot.html.tera @@ -39,7 +39,7 @@ -
+
From e08a5e3b72f770af2336cbcc2f93c6f3ac17ea9b Mon Sep 17 00:00:00 2001 From: fxwiegand Date: Wed, 9 Jun 2021 12:51:29 +0200 Subject: [PATCH 04/21] Remove debug --- src/bam/plot/plot_bam.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/bam/plot/plot_bam.rs b/src/bam/plot/plot_bam.rs index 69fb108a..a2c2f930 100644 --- a/src/bam/plot/plot_bam.rs +++ b/src/bam/plot/plot_bam.rs @@ -34,8 +34,6 @@ pub(crate) fn plot_bam( )?; let visualization = manipulate_json(content, start, end, max_rows)?; - dbg!(&visualization); - let mut templates = Tera::default(); templates.add_raw_template("bam_plot.html.tera", include_str!("bam_plot.html.tera"))?; let mut context = Context::new(); From ba4cc892fbb3c025a9912af94aa6574f24a74ee6 Mon Sep 17 00:00:00 2001 From: fxwiegand Date: Wed, 9 Jun 2021 13:24:54 +0200 Subject: [PATCH 05/21] Format --- src/bam/plot/plot_bam.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/bam/plot/plot_bam.rs b/src/bam/plot/plot_bam.rs index a2c2f930..22ac0c20 100644 --- a/src/bam/plot/plot_bam.rs +++ b/src/bam/plot/plot_bam.rs @@ -41,7 +41,10 @@ pub(crate) fn plot_bam( context.insert("time", &local.format("%a %b %e %T %Y").to_string()); context.insert("version", &env!("CARGO_PKG_VERSION")); context.insert("specs", &visualization); - context.insert("bam", &Path::new(bam_path).iter().last().unwrap().to_str().unwrap()); + context.insert( + "bam", + &Path::new(bam_path).iter().last().unwrap().to_str().unwrap(), + ); context.insert("chrom", &chrom); context.insert("start", &start); context.insert("end", &end); From 032d417349ba071b18ed71138080993d3d3e2fc8 Mon Sep 17 00:00:00 2001 From: fxwiegand Date: Tue, 15 Jun 2021 11:17:14 +0200 Subject: [PATCH 06/21] Support for multiple bam files --- src/bam/plot/bam_plot.html.tera | 51 ++++++++++++++++++++++++--------- src/bam/plot/plot_bam.rs | 46 ++++++++++++++++++----------- src/cli.rs | 8 +++--- 3 files changed, 71 insertions(+), 34 deletions(-) diff --git a/src/bam/plot/bam_plot.html.tera b/src/bam/plot/bam_plot.html.tera index 6122970b..4cfd7eab 100644 --- a/src/bam/plot/bam_plot.html.tera +++ b/src/bam/plot/bam_plot.html.tera @@ -1,7 +1,7 @@ - BAM Plot for {{ bam }} + rbt bam report @@ -34,27 +34,50 @@ -
+
+ +
+
+
diff --git a/src/bam/plot/plot_bam.rs b/src/bam/plot/plot_bam.rs index 22ac0c20..d8e568a3 100644 --- a/src/bam/plot/plot_bam.rs +++ b/src/bam/plot/plot_bam.rs @@ -10,7 +10,7 @@ use std::str::FromStr; use tera::{Context, Tera}; pub(crate) fn plot_bam( - bam_path: &str, + bam_paths: &Vec, fasta_path: &str, region: &str, max_read_depth: u32, @@ -22,17 +22,34 @@ pub(crate) fn plot_bam( assert_eq!(2, span.len()); let start = u64::from_str(span[0])?; let end = u64::from_str(span[1])?; + let mut plots = Vec::new(); - let (content, max_rows) = create_report_data( - Path::new(fasta_path), - None, - Path::new(bam_path), - chrom.to_owned(), - start, - end, - max_read_depth, - )?; - let visualization = manipulate_json(content, start, end, max_rows)?; + for bam_path in bam_paths { + let (content, max_rows) = create_report_data( + Path::new(fasta_path), + None, + Path::new(bam_path), + chrom.to_owned(), + start, + end, + max_read_depth, + )?; + let visualization = manipulate_json(content, start, end, max_rows)?; + + plots.push(visualization); + } + + let bams = bam_paths + .iter() + .map(|b| { + Path::new(b) + .iter() + .last() + .unwrap() + .to_str() + .unwrap() + }) + .collect_vec(); let mut templates = Tera::default(); templates.add_raw_template("bam_plot.html.tera", include_str!("bam_plot.html.tera"))?; @@ -40,11 +57,8 @@ pub(crate) fn plot_bam( let local: DateTime = Local::now(); context.insert("time", &local.format("%a %b %e %T %Y").to_string()); context.insert("version", &env!("CARGO_PKG_VERSION")); - context.insert("specs", &visualization); - context.insert( - "bam", - &Path::new(bam_path).iter().last().unwrap().to_str().unwrap(), - ); + context.insert("plots", &plots); + context.insert("bams", &bams); context.insert("chrom", &chrom); context.insert("start", &start); context.insert("end", &end); diff --git a/src/cli.rs b/src/cli.rs index 91cc6e2b..e7d09f14 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -217,19 +217,19 @@ pub(crate) enum Command { /// Creates a html file with a vega visualization of the given bam region /// Example: - /// rbt plot-bam input.bam 2:132424-132924 + /// rbt plot-bam -b input.bam -g 2:132424-132924 -r input.fa #[structopt(author = "Felix Wiegand ")] PlotBam { /// BAM file to be visualized. - #[structopt()] - bam_path: String, + #[structopt(long, short = "b")] + bam_path: Vec, /// Path to the reference fasta file. #[structopt(long, short = "r", default_value = "100")] reference: String, /// Chromosome and region for the visualization. Example: 2:132424-132924 - #[structopt()] + #[structopt(long, short = "g")] region: String, /// Set the maximum lines of reads that will be shown in the alignment plots. Default value is 500. From 582b106687d464e258e08c94001455a396dc85f1 Mon Sep 17 00:00:00 2001 From: fxwiegand Date: Tue, 15 Jun 2021 11:31:24 +0200 Subject: [PATCH 07/21] Format --- src/bam/plot/plot_bam.rs | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/bam/plot/plot_bam.rs b/src/bam/plot/plot_bam.rs index d8e568a3..2ad044c2 100644 --- a/src/bam/plot/plot_bam.rs +++ b/src/bam/plot/plot_bam.rs @@ -41,14 +41,7 @@ pub(crate) fn plot_bam( let bams = bam_paths .iter() - .map(|b| { - Path::new(b) - .iter() - .last() - .unwrap() - .to_str() - .unwrap() - }) + .map(|b| Path::new(b).iter().last().unwrap().to_str().unwrap()) .collect_vec(); let mut templates = Tera::default(); From bd4fec5e87ac61f67b83bf674e03f5fb828f5438 Mon Sep 17 00:00:00 2001 From: Felix Wiegand Date: Wed, 16 Jun 2021 16:12:16 +0200 Subject: [PATCH 08/21] Plot BAM files below each other --- src/bam/plot/bam_plot.html.tera | 49 ++++++++++++++------------------- 1 file changed, 20 insertions(+), 29 deletions(-) diff --git a/src/bam/plot/bam_plot.html.tera b/src/bam/plot/bam_plot.html.tera index 4cfd7eab..36ce4539 100644 --- a/src/bam/plot/bam_plot.html.tera +++ b/src/bam/plot/bam_plot.html.tera @@ -38,46 +38,37 @@ -
- -
-
-
-
+{% for plot in plots %}
+
+
{% endfor %} From 57d5c0608078ea07e2b8b5516c71525213dd6c2e Mon Sep 17 00:00:00 2001 From: Felix Wiegand Date: Thu, 17 Jun 2021 10:23:58 +0200 Subject: [PATCH 09/21] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 4a4b0b18..ecec6c26 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ Rust-Bio-Tools provides a command `rbt`, which currently supports the following * a tool to generate interactive HTML based reports that offer multiple plots visualizing the provided genomics data in VCF and BAM format (`rbt vcf-report`) * a tool to generate an interactive HTML based report from a csv file including visualizations (`rbt csv-report`) * a tool for splitting VCF/BCF files into N equal chunks, including BND support (`rbt vcf-split`) +* a tool to generate visualizations for a specific region of one or multiple BAM files with a given reference contained in a single HTML file (`rbt plot-bam`) Further functionality is added as it is needed by the authors. Check out the [Contributing](#Contributing) section if you want contribute anything yourself. For a list of changes, take a look at the [CHANGELOG](CHANGELOG.md). From 806d3ca5d951ad699acf0f8a79406ab92d6ebd12 Mon Sep 17 00:00:00 2001 From: Felix Wiegand Date: Thu, 17 Jun 2021 10:46:17 +0200 Subject: [PATCH 10/21] Minor clippy fixes --- src/bam/plot/plot_bam.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bam/plot/plot_bam.rs b/src/bam/plot/plot_bam.rs index 2ad044c2..413aaef6 100644 --- a/src/bam/plot/plot_bam.rs +++ b/src/bam/plot/plot_bam.rs @@ -10,15 +10,15 @@ use std::str::FromStr; use tera::{Context, Tera}; pub(crate) fn plot_bam( - bam_paths: &Vec, + bam_paths: &[String], fasta_path: &str, region: &str, max_read_depth: u32, output_path: &str, ) -> Result<()> { - let splitted_region = region.split(":").collect_vec(); + let splitted_region = region.split(':').collect_vec(); let chrom = splitted_region[0]; - let span = splitted_region[1].split("-").collect_vec(); + let span = splitted_region[1].split('-').collect_vec(); assert_eq!(2, span.len()); let start = u64::from_str(span[0])?; let end = u64::from_str(span[1])?; From 405e9c098ab2e53134a6c813ac20e0e64a220a77 Mon Sep 17 00:00:00 2001 From: fxwiegand Date: Tue, 22 Jun 2021 15:06:36 +0200 Subject: [PATCH 11/21] Write output to stdout --- src/bam/plot/bam_plot.html.tera | 4 ++-- src/bam/plot/plot_bam.rs | 7 ++----- src/cli.rs | 6 +----- src/main.rs | 5 ----- 4 files changed, 5 insertions(+), 17 deletions(-) diff --git a/src/bam/plot/bam_plot.html.tera b/src/bam/plot/bam_plot.html.tera index 36ce4539..dc567950 100644 --- a/src/bam/plot/bam_plot.html.tera +++ b/src/bam/plot/bam_plot.html.tera @@ -38,8 +38,8 @@ -{% for plot in plots %}
-
+{% for plot in plots %}
+
{% endfor %}