Skip to content

Commit

Permalink
feat: add gc content, read length and N base count
Browse files Browse the repository at this point in the history
in output for subcli flat
  • Loading branch information
sharkLoc committed Apr 19, 2024
1 parent d995dbb commit eb8bca8
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 1 deletion.
24 changes: 23 additions & 1 deletion src/cli/flatten.rs
Expand Up @@ -9,6 +9,9 @@ pub fn flatten_fq(
out: Option<&String>,
flag: u8,
sep: char,
gap: bool,
len: bool,
gc: bool,
compression_level: u32,
) -> Result<(), Error> {
let start = Instant::now();
Expand Down Expand Up @@ -37,11 +40,30 @@ pub fn flatten_fq(

for rec in fq_reader.records().flatten() {
let read = vec![rec.id().as_bytes(), rec.seq(), "+".as_bytes(), rec.qual()];
let res = fields.iter().map(|idx| read[*idx]).collect::<Vec<&[u8]>>();
let res = fields
.iter()
.map(|idx| read[*idx])
.collect::<Vec<&[u8]>>();

let mut out = Vec::new();
for x in res {
out.push(std::str::from_utf8(x)?.to_string());
}
if gap {
out.push(rec.seq().iter().filter(|x| *x == &b'N').count().to_string());
}
if len {
out.push(rec.seq().len().to_string());
}
if gc {
let gc_count = rec
.seq()
.iter()
.filter(|x| *x == &b'G' || *x == &b'C')
.count();
let gc_ratio = format!("{:.2}",gc_count as f64 / rec.seq().len() as f64 * 100.0);
out.push(gc_ratio);
}
out_writer.write_all(out.join(sep.to_string().as_str()).as_bytes())?;
out_writer.write_all("\n".as_bytes())?;
}
Expand Down
9 changes: 9 additions & 0 deletions src/command.rs
Expand Up @@ -435,6 +435,15 @@ pub enum Subcli {
/// output seprater, can be ",", ";",
#[arg(short = 's', long = "sep", default_value_t = '\t', value_name = "CHAR")]
sep: char,
/// if specified, add N base count in output
#[arg(short = 'n', long = "gap-n", help_heading = Some("FLAGS"))]
gap: bool,
/// if specified, add read length in output
#[arg(short = 'l', long = "length", help_heading = Some("FLAGS"))]
len: bool,
/// if specified, add GC content(%) in output
#[arg(short = 'g', long = "gc-content", help_heading = Some("FLAGS"))]
gc: bool,
/// output file name or write to stdout, file ending in .gz/.bz2/.xz will be compressed automatically
#[arg(short = 'o', long = "out", value_name = "FILE")]
out: Option<String>,
Expand Down
6 changes: 6 additions & 0 deletions src/main.rs
Expand Up @@ -170,13 +170,19 @@ fn main() -> Result<(), Error> {
input,
flag,
sep,
gap,
len,
gc,
out,
} => {
flatten_fq(
input.as_ref(),
out.as_ref(),
flag,
sep,
gap,
len,
gc,
arg.compression_level,
)?;
}
Expand Down

0 comments on commit eb8bca8

Please sign in to comment.