diff --git a/src/cmd/slice.rs b/src/cmd/slice.rs index 7c41d4fe6..ae3b5c5d2 100644 --- a/src/cmd/slice.rs +++ b/src/cmd/slice.rs @@ -25,6 +25,11 @@ slice options: of --end). -i, --index Slice a single record (shortcut for -s N -l 1). If negative, starts from the last record. + --json Output the result as JSON. Fields are written + as key-value pairs. The key is the column name. + The value is the field value. The output is a + JSON array. If --no-headers is set, then + the keys are the column indices (zero-based). Common options: -h, --help Display this message @@ -36,16 +41,19 @@ Common options: Must be a single character. (default: ,) "#; -use std::fs; +use std::{fs, io, io::Write, sync::OnceLock}; use serde::Deserialize; use crate::{ + config, config::{Config, Delimiter}, index::Indexed, util, CliResult, }; +static NULL_VAL: OnceLock = OnceLock::new(); + #[derive(Deserialize)] struct Args { arg_input: Option, @@ -53,6 +61,7 @@ struct Args { flag_end: Option, flag_len: Option, flag_index: Option, + flag_json: bool, flag_output: Option, flag_no_headers: bool, flag_delimiter: Option, @@ -60,6 +69,10 @@ struct Args { pub fn run(argv: &[&str]) -> CliResult<()> { let args: Args = util::get_args(USAGE, argv)?; + + // set this once, as this is used repeatedly in a hot loop + NULL_VAL.set("null".to_string()).unwrap(); + match args.rconfig().indexed()? { None => args.no_index(), Some(idxed) => args.with_index(idxed), @@ -67,39 +80,149 @@ pub fn run(argv: &[&str]) -> CliResult<()> { } impl Args { + fn create_json_writer(&self) -> io::Result> { + // create a JSON writer + // if flag_output is None or "-" then write to stdout + let output = self.flag_output.as_ref().map_or("-", |s| s.as_str()); + let writer: Box = match output { + "-" => Box::new(io::BufWriter::with_capacity( + config::DEFAULT_WTR_BUFFER_CAPACITY, + io::stdout(), + )), + _ => Box::new(io::BufWriter::with_capacity( + config::DEFAULT_WTR_BUFFER_CAPACITY, + fs::File::create(output)?, + )), + }; + Ok(writer) + } + + fn write_json( + &self, + headers: &csv::ByteRecord, + records: impl Iterator, + ) -> CliResult<()> { + let mut json_wtr = self.create_json_writer()?; + + let header_vec: Vec = headers + .iter() + .enumerate() + .map(|(col_idx, b)| { + if self.flag_no_headers { + col_idx.to_string() + } else { + String::from_utf8_lossy(b).to_string() + } + }) + .collect(); + + // Write the opening bracket for the JSON array + write!(json_wtr, "[")?; + let mut is_first = true; + + let rec_len = header_vec.len().saturating_sub(1); + let mut temp_val; + let mut json_string_val: serde_json::Value; + for record in records { + if !is_first { + // Write a comma before each record except the first one + write!(json_wtr, ",")?; + } + write!(json_wtr, "{{")?; + for (idx, b) in record.iter().enumerate() { + if let Ok(val) = simdutf8::basic::from_utf8(b) { + temp_val = val.to_owned(); + } else { + temp_val = String::from_utf8_lossy(b).to_string(); + } + if temp_val.is_empty() { + temp_val.clone_from(NULL_VAL.get().unwrap()); + } else { + // we round-trip the value to serde_json::Value + // to escape the string properly per JSON spec + json_string_val = serde_json::Value::String(temp_val); + temp_val = json_string_val.to_string(); + } + // safety: idx is always in bounds + // so we can get_unchecked here + if idx < rec_len { + unsafe { + write!( + &mut json_wtr, + "\"{key}\":{value},", + key = header_vec.get_unchecked(idx), + value = temp_val + )?; + } + } else { + unsafe { + write!( + &mut json_wtr, + "\"{key}\":{value}", + key = header_vec.get_unchecked(idx), + value = temp_val + )?; + } + } + } + write!(json_wtr, "}}")?; + is_first = false; + } + writeln!(json_wtr, "]")?; + Ok(json_wtr.flush()?) + } + fn no_index(&self) -> CliResult<()> { let mut rdr = self.rconfig().reader()?; - let mut wtr = self.wconfig().writer()?; - self.rconfig().write_headers(&mut rdr, &mut wtr)?; let (start, end) = self.range()?; - for r in rdr.byte_records().skip(start).take(end - start) { - wtr.write_byte_record(&r?)?; + if self.flag_json { + let headers = rdr.byte_headers()?.clone(); + let records = rdr + .byte_records() + .skip(start) + .take(end - start) + .map(|r| r.unwrap()); + self.write_json(&headers, records) + } else { + let mut wtr = self.wconfig().writer()?; + self.rconfig().write_headers(&mut rdr, &mut wtr)?; + for r in rdr.byte_records().skip(start).take(end - start) { + wtr.write_byte_record(&r?)?; + } + Ok(wtr.flush()?) } - Ok(wtr.flush()?) } - fn with_index(&self, mut idx: Indexed) -> CliResult<()> { - let mut wtr = self.wconfig().writer()?; - self.rconfig().write_headers(&mut *idx, &mut wtr)?; - + fn with_index(&self, mut indexed_file: Indexed) -> CliResult<()> { let (start, end) = self.range()?; if end - start == 0 { return Ok(()); } - idx.seek(start as u64)?; - for r in idx.byte_records().take(end - start) { - wtr.write_byte_record(&r?)?; + indexed_file.seek(start as u64)?; + if self.flag_json { + let headers = indexed_file.byte_headers()?.clone(); + let records = indexed_file + .byte_records() + .take(end - start) + .map(|r| r.unwrap()); + self.write_json(&headers, records) + } else { + let mut wtr = self.wconfig().writer()?; + self.rconfig().write_headers(&mut *indexed_file, &mut wtr)?; + for r in indexed_file.byte_records().take(end - start) { + wtr.write_byte_record(&r?)?; + } + Ok(wtr.flush()?) } - Ok(wtr.flush()?) } - fn range(&self) -> Result<(usize, usize), String> { + fn range(&self) -> CliResult<(usize, usize)> { let mut start = None; if let Some(start_arg) = self.flag_start { if start_arg < 0 { start = Some( - (util::count_rows(&self.rconfig()).unwrap() as usize) + (util::count_rows(&self.rconfig())? as usize) .abs_diff(start_arg.unsigned_abs()), ); } else { @@ -108,7 +231,7 @@ impl Args { } let index = if let Some(flag_index) = self.flag_index { if flag_index < 0 { - let index = (util::count_rows(&self.rconfig()).unwrap() as usize) + let index = (util::count_rows(&self.rconfig())? as usize) .abs_diff(flag_index.unsigned_abs()); Some(index) } else { @@ -117,7 +240,7 @@ impl Args { } else { None }; - util::range(start, self.flag_end, self.flag_len, index) + Ok(util::range(start, self.flag_end, self.flag_len, index)?) } fn rconfig(&self) -> Config { diff --git a/tests/test_slice.rs b/tests/test_slice.rs index 5cf57dc46..b8cbbf27e 100644 --- a/tests/test_slice.rs +++ b/tests/test_slice.rs @@ -10,49 +10,91 @@ macro_rules! slice_tests { #[test] fn headers_no_index() { let name = concat!(stringify!($name), "headers_no_index"); - test_slice(name, $start, $end, $expected, true, false, false); + test_slice(name, $start, $end, $expected, true, false, false, false); } #[test] fn no_headers_no_index() { let name = concat!(stringify!($name), "no_headers_no_index"); - test_slice(name, $start, $end, $expected, false, false, false); + test_slice(name, $start, $end, $expected, false, false, false, false); + } + + #[test] + fn no_headers_no_index_json() { + let name = concat!(stringify!($name), "no_headers_no_index_json"); + test_slice(name, $start, $end, $expected, false, false, false, true); } #[test] fn headers_index() { let name = concat!(stringify!($name), "headers_index"); - test_slice(name, $start, $end, $expected, true, true, false); + test_slice(name, $start, $end, $expected, true, true, false, false); } #[test] fn no_headers_index() { let name = concat!(stringify!($name), "no_headers_index"); - test_slice(name, $start, $end, $expected, false, true, false); + test_slice(name, $start, $end, $expected, false, true, false, false); + } + + #[test] + fn headers_index_json() { + let name = concat!(stringify!($name), "headers_index_json"); + test_slice(name, $start, $end, $expected, true, true, false, true); + } + + #[test] + fn no_headers_index_json() { + let name = concat!(stringify!($name), "no_headers_index_json"); + test_slice(name, $start, $end, $expected, false, true, false, true); } #[test] fn headers_no_index_len() { let name = concat!(stringify!($name), "headers_no_index_len"); - test_slice(name, $start, $end, $expected, true, false, true); + test_slice(name, $start, $end, $expected, true, false, true, false); } #[test] fn no_headers_no_index_len() { let name = concat!(stringify!($name), "no_headers_no_index_len"); - test_slice(name, $start, $end, $expected, false, false, true); + test_slice(name, $start, $end, $expected, false, false, true, false); + } + + #[test] + fn headers_no_index_len_json() { + let name = concat!(stringify!($name), "headers_no_index_len_json"); + test_slice(name, $start, $end, $expected, true, false, true, true); + } + + #[test] + fn no_headers_no_index_len_json() { + let name = concat!(stringify!($name), "no_headers_no_index_len_json"); + test_slice(name, $start, $end, $expected, false, false, true, true); } #[test] fn headers_index_len() { let name = concat!(stringify!($name), "headers_index_len"); - test_slice(name, $start, $end, $expected, true, true, true); + test_slice(name, $start, $end, $expected, true, true, true, false); } #[test] fn no_headers_index_len() { let name = concat!(stringify!($name), "no_headers_index_len"); - test_slice(name, $start, $end, $expected, false, true, true); + test_slice(name, $start, $end, $expected, false, true, true, false); + } + + #[test] + fn headers_index_len_json() { + let name = concat!(stringify!($name), "headers_index_len_json"); + test_slice(name, $start, $end, $expected, true, true, true, true); + } + + #[test] + fn no_headers_index_len_json() { + let name = concat!(stringify!($name), "no_headers_index_len_json"); + test_slice(name, $start, $end, $expected, false, true, true, true); } } }; @@ -84,6 +126,7 @@ fn test_slice( headers: bool, use_index: bool, as_len: bool, + json_output: bool, ) { let (wrk, mut cmd) = setup(name, headers, use_index); if let Some(start) = start { @@ -105,16 +148,41 @@ fn test_slice( if !headers { cmd.arg("--no-headers"); } + if json_output { + let output_file = wrk.path("output.json").to_string_lossy().to_string(); - let got: Vec> = wrk.read_stdout(&mut cmd); - let mut expected = expected - .iter() - .map(|&s| vec![s.to_owned()]) - .collect::>>(); - if headers { - expected.insert(0, svec!["header"]); + cmd.arg("--json").args(&["--output", &output_file]); + + wrk.assert_success(&mut cmd); + + let gots = wrk.read_to_string(&output_file); + let gotj: serde_json::Value = serde_json::from_str(&gots).unwrap(); + let got = gotj.to_string(); + + let expected_vec = expected + .iter() + .map(|&s| { + if headers { + format!("{{\"header\":\"{}\"}}", s) + } else { + format!("{{\"0\":\"{}\"}}", s) + } + }) + .collect::>(); + let expected = format!("[{}]", expected_vec.join(",")); + + assert_eq!(got, expected); + } else { + let got: Vec> = wrk.read_stdout(&mut cmd); + let mut expected = expected + .iter() + .map(|&s| vec![s.to_owned()]) + .collect::>>(); + if headers { + expected.insert(0, svec!["header"]); + } + assert_eq!(got, expected); } - assert_eq!(got, expected); } fn test_index(name: &str, idx: isize, expected: &str, headers: bool, use_index: bool) { @@ -149,6 +217,7 @@ fn slice_negative_with_len() { true, true, true, + false, ); test_slice( "slice_negative_start_no_headers_index_len", @@ -158,6 +227,7 @@ fn slice_negative_with_len() { false, true, true, + false, ); test_slice( "slice_negative_start_headers_no_index_len", @@ -167,6 +237,41 @@ fn slice_negative_with_len() { true, false, true, + false, + ); +} + +#[test] +fn slice_negative_with_len_json() { + test_slice( + "slice_negative_start_headers_index_len_json", + Some(-4), + Some(2), + &["b", "c"], + true, + true, + true, + true, + ); + test_slice( + "slice_negative_start_no_headers_index_len_json", + Some(-4), + Some(2), + &["b", "c"], + false, + true, + true, + true, + ); + test_slice( + "slice_negative_start_headers_no_index_len_json", + Some(-4), + Some(2), + &["b", "c"], + true, + false, + true, + true, ); }