Skip to content

Commit

Permalink
feat: New subcommand for plotting bam files (#156)
Browse files Browse the repository at this point in the history
* Add new subcommand plot-bam

* Minor fixes

* Visual updates

* Remove debug

* Format

* Support for multiple bam files

* Format

* Plot BAM files below each other

* Update README.md

* Minor clippy fixes

* Write output to stdout

* Format

* Add navigation for samples

* Bump xlsxwriter version

* Update Cargo.toml

* Update Cargo.toml

* Update Cargo.toml

* Fix cli help text and wrong default value

* pack chrom, start, stop into Region struct instead; replace Path arg types with P: AsRef<Path>

* Use context for anyhow error message

* Update README.md

Co-authored-by: Till Hartmann <till.hartmann@udo.edu>
Co-authored-by: Johannes Köster <johannes.koester@tu-dortmund.de>
  • Loading branch information
3 people committed Jul 7, 2021
1 parent 5a91142 commit 707ef77
Show file tree
Hide file tree
Showing 12 changed files with 274 additions and 63 deletions.
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Rust-Bio-Tools provides a command `rbt`, which currently supports the following
* a tool to generate interactive HTML based reports that offer multiple plots visualizing the provided genomics data in VCF and BAM format (`rbt vcf-report`)
* a tool to generate an interactive HTML based report from a csv file including visualizations (`rbt csv-report`)
* a tool for splitting VCF/BCF files into N equal chunks, including BND support (`rbt vcf-split`)
* a tool to generate visualizations for a specific region of one or multiple BAM files with a given reference contained in a single HTML file (`rbt plot-bam`)

Further functionality is added as it is needed by the authors. Check out the [Contributing](#Contributing) section if you want contribute anything yourself.
For a list of changes, take a look at the [CHANGELOG](CHANGELOG.md).
Expand Down Expand Up @@ -63,6 +64,7 @@ This should format, check and lint your code when committing.

## Authors

* Johannes Köster (https://koesterlab.github.io)
* Felix Mölder
* Henning Timm
* [Johannes Köster](https://github.com/johanneskoester) (https://koesterlab.github.io)
* [Felix Mölder](https://github.com/FelixMoelder)
* [Henning Timm](https://github.com/HenningTimm)
* [Felix Wiegand](https://github.com/fxwiegand)
1 change: 1 addition & 0 deletions src/bam/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//! Tools that work on BAM files
pub mod collapse_reads_to_fragments;
pub mod depth;
pub mod plot;
86 changes: 86 additions & 0 deletions src/bam/plot/bam_plot.html.tera
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
<!doctype html>
<html lang="en">
<head>
<title>rbt bam report</title>
<meta charset="UTF-8">
<script src="https://cdn.jsdelivr.net/npm/vega@5"></script>
<script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script>
<script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>
<script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js" integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1" crossorigin="anonymous"></script>
<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js" integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/lz-string/1.4.4/lz-string.min.js" integrity="sha512-qoCTmFwBtCPvFhA+WAqatSOrghwpDhFHxwAGh+cppWonXbHA09nG1z5zi4/NGnp8dUhXiVrzA6EnKgJA+fyrpw==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
<script src="https://unpkg.com/jsonm@1.0.10/build/jsonm.js"></script>
<!-- CSS only -->
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
</head>

<body style="text-align: center;">
<nav class="navbar navbar-expand-lg navbar-dark bg-dark">
<a class="navbar-brand" href="#">rbt report</a>
<div class="collapse navbar-collapse" id="navbarText">
<ul class="navbar-nav mr-auto">
<li class="nav-item">
<a class="nav-link" href="https://github.com/rust-bio/rust-bio-tools/blob/master/CHANGELOG.md">{{ version }}</a>
</li>
<li class="nav-item">
<a class="nav-link" href="https://github.com/rust-bio/rust-bio-tools">github</a>
</li>
</ul>
<span class="navbar-text">
created {{ time }}
</span>
</div>
</nav>
<nav aria-label="breadcrumb">
<ul class="breadcrumb">
<li class="breadcrumb-item active" style="padding-top: 8px;">chrom: {{ chrom|safe }}</li>
<li class="breadcrumb-item active" style="padding-top: 8px;" aria-current="page">region: {{ start }} - {{ end }}</li>
<button class="btn btn-secondary dropdown-toggle ml-auto" type="button" id="dropdownMenuButton" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
Samples
</button>
<div class="dropdown-menu" aria-labelledby="dropdownMenuButton" style="overflow-y: auto; max-height: 200px; z-index: 1001;">
{% for bam in bams %}<div class="dropdown-item">
<input class="form-check-input" type="checkbox" checked="checked" data-toggle="collapse" data-target="#collapse_{{loop.index}}" value="" id="defaultCheck{{ loop.index }}">
<label class="form-check-label" for="defaultCheck{{ loop.index }}">
{{ bam }}
</label>
</div>{% endfor %}
</div>
</ul>
</nav>

{% for plot in plots %}<div id="collapse_{{loop.index}}" class="row justify-content-center collapse show" style="margin-top: 25px;">
<div id="plot_{{loop.index}}" style="overflow-y: auto; height: calc(50vh - 50px);"></div>
</div>{% endfor %}
<script>
let plots = [{% for plot in plots %}{{ plot | safe }}{% if not loop.last %},{% endif %}{% endfor %}];
let bams = [{% for bam in bams %}"{{ bam }}"{% if not loop.last %},{% endif %}{% endfor %}];
let i = 1;

let decompressed_plots = [];

for (plot of plots) {
let decompressed = LZString.decompressFromUTF16(plot);
let unpacker = new jsonm.Unpacker();
unpacker.setMaxDictSize(100000);
let unpacked_specs = unpacker.unpack(JSON.parse(decompressed));
unpacked_specs["width"] = $(window).width() - 100;
unpacked_specs["title"] = bams[i - 1];
decompressed_plots.push(unpacked_specs);
let plot_id = `#plot_${i}`;
vegaEmbed(plot_id, unpacked_specs);
i++;
}

$(window).resize(function() {
let j = 1;
for (plot of decompressed_plots) {
let plot_id = `#plot_${j}`;
plot["width"] = $(window).width() - 100;
vegaEmbed(plot_id, plot);
}
})
</script>
</body>
</html>
1 change: 1 addition & 0 deletions src/bam/plot/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod plot_bam;
50 changes: 50 additions & 0 deletions src/bam/plot/plot_bam.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
use crate::bcf::report::table_report::create_report_table::create_report_data;
use crate::bcf::report::table_report::create_report_table::manipulate_json;
use crate::common::Region;
use anyhow::Result;
use chrono::{DateTime, Local};
use itertools::Itertools;
use std::io;
use std::io::Write;
use std::path::Path;
use tera::{Context, Tera};

pub(crate) fn plot_bam<P: AsRef<Path>>(
bam_paths: &[P],
fasta_path: P,
region: &Region,
max_read_depth: u32,
) -> Result<()> {
let mut plots = Vec::new();

let Region { target, start, end } = region.clone();
for bam_path in bam_paths {
let (content, max_rows) =
create_report_data(&fasta_path, None, bam_path, region, max_read_depth)?;
let visualization = manipulate_json(content, start, end, max_rows)?;

plots.push(visualization);
}

let bams = bam_paths
.iter()
.map(|b| b.as_ref().iter().last().unwrap().to_str().unwrap())
.collect_vec();

let mut templates = Tera::default();
templates.add_raw_template("bam_plot.html.tera", include_str!("bam_plot.html.tera"))?;
let mut context = Context::new();
let local: DateTime<Local> = Local::now();
context.insert("time", &local.format("%a %b %e %T %Y").to_string());
context.insert("version", &env!("CARGO_PKG_VERSION"));
context.insert("plots", &plots);
context.insert("bams", &bams);
context.insert("chrom", &target);
context.insert("start", &start);
context.insert("end", &end);

let html = templates.render("bam_plot.html.tera", &context)?;
io::stdout().write_all(html.as_bytes())?;

Ok(())
}
16 changes: 9 additions & 7 deletions src/bcf/report/table_report/alignment_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ extern crate rust_htslib;

use self::rust_htslib::bam::FetchDefinition;
use crate::bcf::report::table_report::fasta_reader::read_fasta;
use crate::common::Region;
use anyhow::Result;
use rust_htslib::bam::record::CigarStringView;
use rust_htslib::{bam, bam::Read};
Expand Down Expand Up @@ -73,8 +74,10 @@ pub fn decode_flags(code: u16) -> Vec<u16> {
read_map
}

pub fn read_indexed_bam(path: &Path, chrom: String, from: u64, to: u64) -> Result<Vec<Alignment>> {
pub fn read_indexed_bam<P: AsRef<Path>>(path: P, region: &Region) -> Result<Vec<Alignment>> {
let mut bam = bam::IndexedReader::from_path(&path)?;
let chrom = &region.target;
let (from, to) = (region.start, region.end);
let tid = bam.header().tid(chrom.as_bytes()).unwrap() as i32;

let mut alignments: Vec<Alignment> = Vec::new();
Expand Down Expand Up @@ -133,17 +136,16 @@ fn make_alignment(record: &bam::Record) -> Alignment {
}
}

pub fn make_nucleobases(
fasta_path: &Path,
chrom: String,
pub fn make_nucleobases<P: AsRef<Path>>(
fasta_path: P,
region: &Region,
snippets: Vec<Alignment>,
from: u64,
to: u64,
) -> Result<(Vec<AlignmentNucleobase>, Vec<AlignmentMatch>)> {
let mut bases: Vec<AlignmentNucleobase> = Vec::new();
let mut matches: Vec<AlignmentMatch> = Vec::new();

let ref_bases = read_fasta(fasta_path, chrom, from, to, false)?;
let ref_bases = read_fasta(fasta_path, region, false)?;
let (from, to) = (region.start, region.end);

for snippet in snippets {
let mut cigar_offset: i64 = 0;
Expand Down
67 changes: 36 additions & 31 deletions src/bcf/report/table_report/create_report_table.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::bcf::report::table_report::fasta_reader::{get_fasta_lengths, read_fasta};
use crate::bcf::report::table_report::static_reader::{get_static_reads, Variant};
use crate::common::Region;
use anyhow::Context as AnyhowContext;
use anyhow::Result;
use chrono::{DateTime, Local};
Expand Down Expand Up @@ -206,9 +207,9 @@ pub(crate) fn make_table_report(
.get(&field.to_owned())
.context({
format!(
"No field named {} found. Please only use VEP-annotated VCF-files.",
field
)
"No field named {} found. Please only use VEP-annotated VCF-files.",
field
)
})
.unwrap()],
)
Expand All @@ -222,7 +223,7 @@ pub(crate) fn make_table_report(
warn!("Warning! Found allele without SYMBOL or Gene field in record at {}:{}. Using HGVSg instead.", &chrom, variant.pos());
get_field("HGVSg")?
} else {
warn!("Warning! Found allele without SYMBOL, Gene or HGVSg field in record at {}:{}. This record will be skipped!", &chrom, variant.pos());
warn!("Warning! Found allele without SYMBOL, Gene or HGVSg field in record at {}:{}. This record will be skipped!", &chrom, variant.pos());
continue;
};
genes.push(gene.to_owned());
Expand Down Expand Up @@ -325,35 +326,41 @@ pub(crate) fn make_table_report(
if pos < 75 {
let (content, max_rows) = create_report_data(
fasta_path,
var.clone(),
Some(var.clone()),
bam_path,
chrom.clone(),
0,
end_position as u64 + 75,
&Region {
target: chrom.clone(),
start: 0,
end: end_position as u64 + 75,
},
max_read_depth,
)?;
visualization =
manipulate_json(content, 0, end_position as u64 + 75, max_rows)?;
} else if pos + 75 >= fasta_length as i64 {
let (content, max_rows) = create_report_data(
fasta_path,
var.clone(),
Some(var.clone()),
bam_path,
chrom.clone(),
pos as u64 - 75,
fasta_length - 1,
&Region {
target: chrom.clone(),
start: pos as u64 - 75,
end: fasta_length - 1,
},
max_read_depth,
)?;
visualization =
manipulate_json(content, pos as u64 - 75, fasta_length - 1, max_rows)?;
} else {
let (content, max_rows) = create_report_data(
fasta_path,
var.clone(),
Some(var.clone()),
bam_path,
chrom.clone(),
pos as u64 - 75,
end_position as u64 + 75,
&Region {
target: chrom.clone(),
start: pos as u64 - 75,
end: end_position as u64 + 75,
},
max_read_depth,
)?;
visualization = manipulate_json(
Expand Down Expand Up @@ -490,30 +497,26 @@ pub(crate) fn read_tag_entries(
Ok(())
}

fn create_report_data(
fasta_path: &Path,
variant: Variant,
bam_path: &Path,
chrom: String,
from: u64,
to: u64,
pub(crate) fn create_report_data<P: AsRef<Path>>(
fasta_path: P,
variant: Option<Variant>,
bam_path: P,
region: &Region,
max_read_depth: u32,
) -> Result<(Json, usize)> {
let mut data = Vec::new();

for f in read_fasta(fasta_path, chrom.clone(), from, to, true)? {
for f in read_fasta(&fasta_path, region, true)? {
let nucleobase = json!(f);
data.push(nucleobase);
}

let (bases, matches, max_rows) = get_static_reads(
bam_path,
fasta_path,
chrom,
from,
to,
region,
max_read_depth,
&variant,
variant.as_ref(),
)?;

for b in bases {
Expand All @@ -526,14 +529,16 @@ fn create_report_data(
data.push(mat);
}

data.push(json!(variant));
if variant.is_some() {
data.push(json!(variant));
}

Ok((Json::from_str(&json!(data).to_string()).unwrap(), max_rows))
}

/// Inserts the json containing the genome data into the vega specs.
/// It also changes keys and values of the json data for the vega plot to look better and compresses the json with jsonm.
fn manipulate_json(data: Json, from: u64, to: u64, max_rows: usize) -> Result<String> {
pub(crate) fn manipulate_json(data: Json, from: u64, to: u64, max_rows: usize) -> Result<String> {
let json_string = include_str!("vegaSpecs.json");

let mut vega_specs: Value = serde_json::from_str(json_string)?;
Expand All @@ -545,7 +550,7 @@ fn manipulate_json(data: Json, from: u64, to: u64, max_rows: usize) -> Result<St
for (i, _) in v.iter().enumerate() {
let k = v[i]["marker_type"].clone().as_str().unwrap().to_owned();

if k == "A" || k == "T" || k == "G" || k == "C" || k == "U" {
if k == "A" || k == "T" || k == "G" || k == "C" || k == "U" || k == "N" {
values["values"][i]["base"] = values["values"][i]["marker_type"].clone();
} else if k == "Deletion"
|| k == "Match"
Expand Down
13 changes: 6 additions & 7 deletions src/bcf/report/table_report/fasta_reader.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
use crate::common::Region;
use anyhow::Context;
use anyhow::Result;
use bio::io::fasta;
use serde::Serialize;
use std::collections::HashMap;
use std::path::Path;

pub fn read_fasta(
path: &Path,
chrom: String,
start: u64,
stop: u64,
pub fn read_fasta<P: AsRef<Path>>(
path: P,
region: &Region,
compensate_0_basing: bool,
) -> Result<Vec<Nucleobase>> {
let mut reader = fasta::IndexedReader::from_file(&path).unwrap();
Expand All @@ -18,11 +17,11 @@ pub fn read_fasta(

let mut seq: Vec<u8> = Vec::new();

reader.fetch(&chrom, start, stop)?;
reader.fetch(&region.target, region.start, region.end)?;
reader.read(&mut seq)?;

let mut fasta = Vec::new();
let mut ind = start;
let mut ind = region.start;
if compensate_0_basing {
ind += 1;
}
Expand Down
Loading

0 comments on commit 707ef77

Please sign in to comment.