From f981955f0e6c6bb233020dc01b717112ff15647c Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Tue, 26 Apr 2022 08:02:38 -0700 Subject: [PATCH] Fixed bug in writing csv with buffer resizing (#965) --- src/io/csv/write/serialize.rs | 18 ++++++++---------- tests/it/io/csv/write.rs | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/src/io/csv/write/serialize.rs b/src/io/csv/write/serialize.rs index f26dbd00708..6787f4b9d50 100644 --- a/src/io/csv/write/serialize.rs +++ b/src/io/csv/write/serialize.rs @@ -223,8 +223,7 @@ fn new_utf8_serializer<'a, O: Offset>( .delimiter(options.delimiter) .build(); - let resize = |local_buf: &mut Vec| { - let additional = local_buf.len(); + let resize = |local_buf: &mut Vec, additional: usize| { local_buf.extend(std::iter::repeat(0u8).take(additional)) }; @@ -236,16 +235,15 @@ fn new_utf8_serializer<'a, O: Offset>( // This will ensure a csv parser will not read them as missing // in a delimited field Some("") => buf.extend_from_slice(b"\"\""), - Some(s) => loop { - // first write field + Some(s) => { + if s.len() < local_buf.len() * 3 { + resize(&mut local_buf, s.len() * 3) + } match ser_writer.field(s.as_bytes(), &mut local_buf) { - (WriteResult::OutputFull, _, _) => resize(&mut local_buf), - // then on success write delimiter - // we need to make this call because we might need to end with quotes (WriteResult::InputEmpty, _, n_out) => { // the writer::delimiter call writes a maximum of 2 bytes if local_buf.len() - n_out < 2 { - resize(&mut local_buf); + resize(&mut local_buf, 2); } match ser_writer.delimiter(&mut local_buf[n_out..]) { (WriteResult::InputEmpty, n_out_delimiter) => { @@ -256,10 +254,10 @@ fn new_utf8_serializer<'a, O: Offset>( } _ => unreachable!(), } - break; } + _ => unreachable!(), } - }, + } _ => {} } }, diff --git a/tests/it/io/csv/write.rs b/tests/it/io/csv/write.rs index 94ccfea7ff5..d2107bec6b9 100644 --- a/tests/it/io/csv/write.rs +++ b/tests/it/io/csv/write.rs @@ -346,3 +346,22 @@ fn write_escaping() { assert_eq!(csv, "\"Acme co., Ltd.\"\n"); } + +#[test] +fn write_escaping_resize_local_buf() { + // tests if local buffer reallocates properly + let a = Utf8Array::::from_slice(&[ + "bar,123456789012345678901234567890123456789012345678901234567890", + ]); + let columns = Chunk::new(vec![Arc::new(a) as Arc]); + + let mut writer = vec![]; + let options = SerializeOptions::default(); + write_chunk(&mut writer, &columns, &options).unwrap(); + let csv = std::str::from_utf8(&writer).unwrap(); + + assert_eq!( + csv, + "\"bar,123456789012345678901234567890123456789012345678901234567890\"\n" + ); +}