diff --git a/src/cli/flatten.rs b/src/cli/flatten.rs index a652fa1..400c760 100644 --- a/src/cli/flatten.rs +++ b/src/cli/flatten.rs @@ -9,6 +9,9 @@ pub fn flatten_fq( out: Option<&String>, flag: u8, sep: char, + gap: bool, + len: bool, + gc: bool, compression_level: u32, ) -> Result<(), Error> { let start = Instant::now(); @@ -37,11 +40,30 @@ pub fn flatten_fq( for rec in fq_reader.records().flatten() { let read = vec![rec.id().as_bytes(), rec.seq(), "+".as_bytes(), rec.qual()]; - let res = fields.iter().map(|idx| read[*idx]).collect::>(); + let res = fields + .iter() + .map(|idx| read[*idx]) + .collect::>(); + let mut out = Vec::new(); for x in res { out.push(std::str::from_utf8(x)?.to_string()); } + if gap { + out.push(rec.seq().iter().filter(|x| *x == &b'N').count().to_string()); + } + if len { + out.push(rec.seq().len().to_string()); + } + if gc { + let gc_count = rec + .seq() + .iter() + .filter(|x| *x == &b'G' || *x == &b'C') + .count(); + let gc_ratio = format!("{:.2}",gc_count as f64 / rec.seq().len() as f64 * 100.0); + out.push(gc_ratio); + } out_writer.write_all(out.join(sep.to_string().as_str()).as_bytes())?; out_writer.write_all("\n".as_bytes())?; } diff --git a/src/command.rs b/src/command.rs index 3efe9dd..554b002 100644 --- a/src/command.rs +++ b/src/command.rs @@ -435,6 +435,15 @@ pub enum Subcli { /// output seprater, can be ",", ";", #[arg(short = 's', long = "sep", default_value_t = '\t', value_name = "CHAR")] sep: char, + /// if specified, add N base count in output + #[arg(short = 'n', long = "gap-n", help_heading = Some("FLAGS"))] + gap: bool, + /// if specified, add read length in output + #[arg(short = 'l', long = "length", help_heading = Some("FLAGS"))] + len: bool, + /// if specified, add GC content(%) in output + #[arg(short = 'g', long = "gc-content", help_heading = Some("FLAGS"))] + gc: bool, /// output file name or write to stdout, file ending in .gz/.bz2/.xz will be compressed automatically #[arg(short = 'o', long = "out", value_name = "FILE")] out: Option, diff --git a/src/main.rs b/src/main.rs index b20433d..b53d253 100644 --- a/src/main.rs +++ b/src/main.rs @@ -170,6 +170,9 @@ fn main() -> Result<(), Error> { input, flag, sep, + gap, + len, + gc, out, } => { flatten_fq( @@ -177,6 +180,9 @@ fn main() -> Result<(), Error> { out.as_ref(), flag, sep, + gap, + len, + gc, arg.compression_level, )?; }