Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved performance of writing CSV (#1128)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Jun 29, 2022
1 parent 81ab424 commit 9c6b74a
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 6 deletions.
11 changes: 5 additions & 6 deletions src/io/csv/write/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,16 @@ fn new_serializers<'a, A: AsRef<dyn Array>>(
/// The vector is guaranteed to have `columns.len()` entries.
/// Each `row` is guaranteed to have `columns.array().len()` fields.
pub fn serialize<A: AsRef<dyn Array>>(
columns: &Chunk<A>,
chunk: &Chunk<A>,
options: &SerializeOptions,
) -> Result<Vec<Vec<u8>>> {
let mut serializers = new_serializers(columns, options)?;
let mut serializers = new_serializers(chunk, options)?;

let mut rows = Vec::with_capacity(columns.len());
let mut rows = Vec::with_capacity(chunk.len());
let mut row = vec![];

// this is where the (expensive) transposition happens: the outer loop is on rows, the inner on columns
(0..columns.len()).try_for_each(|_| {
(0..chunk.len()).try_for_each(|_| {
serializers
.iter_mut()
// `unwrap` is infalible because `array.len()` equals `Chunk::len`
Expand All @@ -49,8 +49,7 @@ pub fn serialize<A: AsRef<dyn Array>>(
// replace last delimiter with new line
let last_byte = row.len() - 1;
row[last_byte] = b'\n';
rows.push(row.clone());
row.clear();
rows.push(std::mem::take(&mut row));
}
Result::Ok(())
})?;
Expand Down
20 changes: 20 additions & 0 deletions tests/it/io/csv/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -423,3 +423,23 @@ fn write_escaping_resize_local_buf() {
test_generic(chunk, &format!("\"{}\"\n", payload));
}
}

#[test]
fn serialize_vec() -> Result<()> {
let columns = data();

let options = SerializeOptions::default();

let data = serialize(&columns, &options)?;

// check
assert_eq!(
vec![
b"a b,123.564532,3,true,,00:20:34,d\n".to_vec(),
b"c,,2,false,2019-04-18 10:54:47.378,06:51:20,a b\n".to_vec(),
b"d,-556132.25,1,,2019-04-18 02:45:55.555,23:46:03,c\n".to_vec(),
],
data
);
Ok(())
}

0 comments on commit 9c6b74a

Please sign in to comment.