From 525d52159fd98980af6768be6d293a85c131743a Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sat, 5 Feb 2022 13:30:15 +0100 Subject: [PATCH] fix unescaped '"' in json writing --- src/io/json/write/serialize.rs | 4 ++-- tests/it/io/json/write.rs | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/io/json/write/serialize.rs b/src/io/json/write/serialize.rs index 28bf0bd7563..80c31991c4f 100644 --- a/src/io/json/write/serialize.rs +++ b/src/io/json/write/serialize.rs @@ -139,7 +139,7 @@ fn list_serializer<'a, O: Offset>( #[inline] fn utf8_serialize(value: &str, buf: &mut Vec) { - if value.as_bytes().is_ascii() { + if value.as_bytes().is_ascii() && !value.contains('"') { buf.reserve(value.len() + 2); buf.push(b'"'); buf.extend_from_slice(value.as_bytes()); @@ -147,7 +147,7 @@ fn utf8_serialize(value: &str, buf: &mut Vec) { } else { // it may contain reserved keywords: perform roundtrip for // todo: avoid this roundtrip over serde_json - serde_json::to_writer(buf, &Value::String(value.to_string())).unwrap(); + serde_json::to_writer(buf, value).unwrap(); } } diff --git a/tests/it/io/json/write.rs b/tests/it/io/json/write.rs index 51997c1d367..fa61459944c 100644 --- a/tests/it/io/json/write.rs +++ b/tests/it/io/json/write.rs @@ -305,3 +305,22 @@ fn write_escaped_utf8() -> Result<()> { ); Ok(()) } + +#[test] +fn write_quotation_marks_in_utf8() -> Result<()> { + let a = Utf8Array::::from(&vec![Some("a\"a"), None]); + + let batch = Chunk::try_new(vec![&a as &dyn Array]).unwrap(); + + let buf = write_batch( + batch, + vec!["c1".to_string()], + json_write::LineDelimited::default(), + )?; + + assert_eq!( + String::from_utf8(buf).unwrap().as_bytes(), + b"{\"c1\":\"a\\\"a\"}\n{\"c1\":null}\n" + ); + Ok(()) +}