Skip to content

Commit

Permalink
make total kept bases calculation more accurate
Browse files Browse the repository at this point in the history
  • Loading branch information
mbhall88 committed Jul 29, 2021
1 parent aa5732b commit b72405e
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 22 deletions.
8 changes: 5 additions & 3 deletions src/fastx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,8 @@ impl Fastx {
&self,
mut reads_to_keep: HashSet<u32>,
write_to: &mut T,
) -> Result<(), FastxError> {
) -> Result<usize, FastxError> {
let mut total_len = 0;
let mut reader =
parse_fastx_file(&self.path).map_err(|source| FastxError::ReadError { source })?;
let mut read_idx: u32 = 0;
Expand All @@ -162,6 +163,7 @@ impl Fastx {
match record {
Err(source) => return Err(FastxError::ParseError { source }),
Ok(rec) if keep_this_read => {
total_len += rec.num_bases();
rec.write(write_to, None)
.map_err(|err| FastxError::WriteError {
source: anyhow::Error::from(err),
Expand All @@ -173,11 +175,11 @@ impl Fastx {
reads_to_keep.remove(&(read_idx - 1)); // remember we already incremented the index

if reads_to_keep.is_empty() {
return Ok(());
return Ok(total_len);
}
}
if reads_to_keep.is_empty() {
Ok(())
Ok(total_len)
} else {
Err(FastxError::IndicesNotFound)
}
Expand Down
24 changes: 5 additions & 19 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::io::stdout;

use anyhow::{Context, Result};
use fern::colors::{Color, ColoredLevelConfig};
use log::{debug, error, info, warn};
use log::{debug, error, info};
use structopt::StructOpt;

pub use crate::cli::Cli;
Expand Down Expand Up @@ -120,14 +120,8 @@ fn main() -> Result<()> {
}
debug!("Indices of reads being kept:\n{:?}", reads_to_keep);

if let Err(err) = input_fastx.filter_reads_into(reads_to_keep.clone(), &mut output_handle) {
warn!("{:?}", err);
}

let mut total_kept_bases: u64 = reads_to_keep
.iter()
.map(|&i| read_lengths[i as usize])
.fold(0, |acc, x| acc + u64::from(x));
let mut total_kept_bases =
input_fastx.filter_reads_into(reads_to_keep.clone(), &mut output_handle)? as u64;

// repeat the same process for the second input fastx (if illumina)
if is_illumina {
Expand All @@ -137,16 +131,8 @@ fn main() -> Result<()> {
.create()
.context("unable to create the second output file")?;

total_kept_bases += reads_to_keep
.iter()
.map(|&i| read_lengths[i as usize])
.fold(0, |acc, x| acc + u64::from(x));

if let Err(err) =
second_input_fastx.filter_reads_into(reads_to_keep, &mut second_output_handle)
{
warn!("{:?}", err);
}
total_kept_bases +=
second_input_fastx.filter_reads_into(reads_to_keep, &mut second_output_handle)? as u64;
}

let actual_covg = total_kept_bases / args.genome_size;
Expand Down

0 comments on commit b72405e

Please sign in to comment.