Skip to content

Commit

Permalink
Update slice.rs
Browse files Browse the repository at this point in the history
  • Loading branch information
jqnatividad committed Apr 7, 2024
1 parent 5f38982 commit dec19eb
Showing 1 changed file with 129 additions and 18 deletions.
147 changes: 129 additions & 18 deletions src/cmd/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ slice options:
of --end).
-i, --index <arg> Slice a single record (shortcut for -s N -l 1).
If negative, starts from the last record.
--json Output the result as JSON.
Common options:
-h, --help Display this message
Expand All @@ -36,70 +37,180 @@ Common options:
Must be a single character. (default: ,)
"#;

use std::fs;
use std::{fs, io, io::Write, sync::OnceLock};

use serde::Deserialize;

use crate::{
config,
config::{Config, Delimiter},
index::Indexed,
util, CliResult,
};

static NULL_VAL: OnceLock<String> = OnceLock::new();

#[derive(Deserialize)]
struct Args {
arg_input: Option<String>,
flag_start: Option<isize>,
flag_end: Option<usize>,
flag_len: Option<usize>,
flag_index: Option<isize>,
flag_json: bool,
flag_output: Option<String>,
flag_no_headers: bool,
flag_delimiter: Option<Delimiter>,
}

pub fn run(argv: &[&str]) -> CliResult<()> {
let args: Args = util::get_args(USAGE, argv)?;

// set this once, as this is used repeatedly in a hot loop
NULL_VAL.set("null".to_string()).unwrap();

match args.rconfig().indexed()? {
None => args.no_index(),
Some(idxed) => args.with_index(idxed),
}
}

impl Args {
fn create_json_writer(&self) -> io::Result<Box<dyn Write + Send + 'static>> {
// create a JSON writer
// if flag_output is None or "-" then write to stdout
let output = self.flag_output.as_ref().map_or("-", |s| s.as_str());
let writer: Box<dyn Write + Send + 'static> = match output {
"-" => Box::new(io::BufWriter::with_capacity(
config::DEFAULT_WTR_BUFFER_CAPACITY,
io::stdout(),
)),
_ => Box::new(io::BufWriter::with_capacity(
config::DEFAULT_WTR_BUFFER_CAPACITY,
fs::File::create(output)?,
)),
};
Ok(writer)
}

fn write_json(
&self,
headers: &csv::ByteRecord,
records: impl Iterator<Item = csv::ByteRecord>,
) -> CliResult<()> {
let mut json_wtr = self.create_json_writer()?;
let header_vec: Vec<String> = headers
.iter()
.map(|b| String::from_utf8_lossy(b).to_string())
.collect();

// Write the opening bracket for the JSON array
json_wtr.write(b"[")?;

Check failure

Code scanning / clippy

written amount is not handled Error

written amount is not handled
let mut is_first = true;

let rec_len = header_vec.len().saturating_sub(1);
let mut temp_val;
let mut json_string_val: serde_json::Value;
for record in records {
if !is_first {
// Write a comma before each record except the first one
json_wtr.write(b",")?;

Check failure

Code scanning / clippy

written amount is not handled Error

written amount is not handled
}
write!(json_wtr, "{{")?;
for (idx, b) in record.iter().enumerate() {
if let Ok(val) = simdutf8::basic::from_utf8(b) {
temp_val = val.to_owned();
} else {
temp_val = String::from_utf8_lossy(b).to_string();
}
if temp_val.is_empty() {
temp_val.clone_from(NULL_VAL.get().unwrap());
} else {
// we round-trip the value to serde_json::Value
// to escape the string properly per JSON spec
json_string_val = serde_json::Value::String(temp_val);
temp_val = json_string_val.to_string();
}
// safety: idx is always in bounds
// so we can get_unchecked here
if idx < rec_len {
unsafe {
write!(
&mut json_wtr,
"\"{key}\":{value},",
key = header_vec.get_unchecked(idx),
value = temp_val
)?;
}
} else {
unsafe {
write!(
&mut json_wtr,
"\"{key}\":{value}",
key = header_vec.get_unchecked(idx),
value = temp_val
)?;
}
}
}
write!(json_wtr, "}}")?;
is_first = false;
}
writeln!(json_wtr, "]")?;
Ok(json_wtr.flush()?)
}

fn no_index(&self) -> CliResult<()> {
let mut rdr = self.rconfig().reader()?;
let mut wtr = self.wconfig().writer()?;
self.rconfig().write_headers(&mut rdr, &mut wtr)?;

let (start, end) = self.range()?;
for r in rdr.byte_records().skip(start).take(end - start) {
wtr.write_byte_record(&r?)?;
if self.flag_json {
let headers = rdr.byte_headers()?.clone();
let records = rdr
.byte_records()
.skip(start)
.take(end - start)
.map(|r| r.unwrap());
self.write_json(&headers, records)
} else {
let mut wtr = self.wconfig().writer()?;
self.rconfig().write_headers(&mut rdr, &mut wtr)?;
for r in rdr.byte_records().skip(start).take(end - start) {
wtr.write_byte_record(&r?)?;
}
Ok(wtr.flush()?)
}
Ok(wtr.flush()?)
}

fn with_index(&self, mut idx: Indexed<fs::File, fs::File>) -> CliResult<()> {
let mut wtr = self.wconfig().writer()?;
self.rconfig().write_headers(&mut *idx, &mut wtr)?;

fn with_index(&self, mut indexed_file: Indexed<fs::File, fs::File>) -> CliResult<()> {
let (start, end) = self.range()?;
if end - start == 0 {
return Ok(());
}
idx.seek(start as u64)?;
for r in idx.byte_records().take(end - start) {
wtr.write_byte_record(&r?)?;
indexed_file.seek(start as u64)?;
if self.flag_json {
let headers = indexed_file.byte_headers()?.clone();
let records = indexed_file
.byte_records()
.take(end - start)
.map(|r| r.unwrap());
self.write_json(&headers, records)
} else {
let mut wtr = self.wconfig().writer()?;
self.rconfig().write_headers(&mut *indexed_file, &mut wtr)?;
for r in indexed_file.byte_records().take(end - start) {
wtr.write_byte_record(&r?)?;
}
Ok(wtr.flush()?)
}
Ok(wtr.flush()?)
}

fn range(&self) -> Result<(usize, usize), String> {
fn range(&self) -> CliResult<(usize, usize)> {
let mut start = None;
if let Some(start_arg) = self.flag_start {
if start_arg < 0 {
start = Some(
(util::count_rows(&self.rconfig()).unwrap() as usize)
(util::count_rows(&self.rconfig())? as usize)
.abs_diff(start_arg.unsigned_abs()),
);
} else {
Expand All @@ -108,7 +219,7 @@ impl Args {
}
let index = if let Some(flag_index) = self.flag_index {
if flag_index < 0 {
let index = (util::count_rows(&self.rconfig()).unwrap() as usize)
let index = (util::count_rows(&self.rconfig())? as usize)
.abs_diff(flag_index.unsigned_abs());
Some(index)
} else {
Expand All @@ -117,7 +228,7 @@ impl Args {
} else {
None
};
util::range(start, self.flag_end, self.flag_len, index)
Ok(util::range(start, self.flag_end, self.flag_len, index)?)
}

fn rconfig(&self) -> Config {
Expand Down

0 comments on commit dec19eb

Please sign in to comment.