Skip to content

Commit

Permalink
feat: add -r parameter for subcli tail
Browse files Browse the repository at this point in the history
  • Loading branch information
sharkLoc committed Apr 19, 2024
1 parent ea7dc3a commit d995dbb
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 13 deletions.
30 changes: 20 additions & 10 deletions src/cli/tail.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::time::Instant;
pub fn tail_n_records(
input: Option<&String>,
number: usize,
rdc: bool,
output: Option<&String>,
compression_level: u32,
) -> Result<()> {
Expand All @@ -21,17 +22,26 @@ pub fn tail_n_records(
info!("get tail {} records", number);

let mut fo = fastq::Writer::new(file_writer(output, compression_level)?);
let mut total = 0usize;
if rdc {
let mut total = 0usize;
for _ in fp.records() {
total += 1;
}
info!("fastq file total reads number: {}", total);

for _ in fp.records() {
total += 1;
}
info!("fastq file total reads number: {}", total);
let skip_n = total - number;

let fp2 = fastq::Reader::new(file_reader(input)?);
for rec in fp2.records().skip(skip_n).flatten() {
fo.write_record(&rec)?;
let skip_n = total - number;
let fp2 = fastq::Reader::new(file_reader(input)?);
for rec in fp2.records().skip(skip_n).flatten() {
fo.write_record(&rec)?;
}
} else {
let mut tail = vec![];
for rec in fp.records().map_while(Result::ok) {
tail.push(rec);
}
for rec in tail.iter().rev().take(number).rev(){
fo.write_record(rec)?;
}
}
fo.flush()?;

Expand Down
6 changes: 5 additions & 1 deletion src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,16 @@ pub enum Subcli {
out: Option<String>,
},
/// get last N records from fastq file
#[command(before_help = "note: if the -r parameter is not specified, all data will be read into memory")]
tail {
/// input fastq file, or read from stdin
input: Option<String>,
/// print last N fastq records
#[arg(short = 'n', long = "num", default_value_t = 10, value_name = "INT")]
num: usize,
/// read files twice to reduce much memory but cost more time, can't use in Stream
#[arg(short = 'r', long = "rdc", help_heading = Some("FLAGS"))]
rdc: bool,
/// output fastq file name or write to stdout, files ending in .gz/.bz2/.xz will be compressed automatically
#[arg(short = 'o', long = "out", value_name = "FILE")]
out: Option<String>,
Expand Down Expand Up @@ -478,7 +482,7 @@ pub enum Subcli {
},
/// check the validity of a fastq record
#[command(
before_help = "note: this function will return an Err if one of the following conditions is met:\n
before_help = "note: this function will return an Err if one of the following conditions is met\n
1. the record identifier is empty.
2. there is a non-ASCII character found in either the sequence or quality strings.
3. the sequence and quality strings do not have the same length.\n"
Expand Down
4 changes: 2 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ fn main() -> Result<(), Error> {
Subcli::topn { input, num, out } => {
top_n_records(input.as_ref(), num, out.as_ref(), arg.compression_level)?;
}
Subcli::tail { input, num, out } => {
tail_n_records(input.as_ref(), num, out.as_ref(), arg.compression_level)?;
Subcli::tail { input, num, rdc, out } => {
tail_n_records(input.as_ref(), num, rdc, out.as_ref(), arg.compression_level)?;
}
Subcli::subfq {
input,
Expand Down

0 comments on commit d995dbb

Please sign in to comment.