From eead22fcff1f7ecad9bf48e83616a6a08f487bd4 Mon Sep 17 00:00:00 2001 From: Vasanthakumar Vijayasekaran Date: Sun, 10 Oct 2021 18:00:52 +0530 Subject: [PATCH] Validity taken into account when writing `StructArray` to json (#511) --- src/io/json/write/serialize.rs | 16 +++++++--- tests/it/io/json/write.rs | 57 +++++++++++++++++++++++++++++++--- 2 files changed, 65 insertions(+), 8 deletions(-) diff --git a/src/io/json/write/serialize.rs b/src/io/json/write/serialize.rs index 708019b6f89..8aa43ce946e 100644 --- a/src/io/json/write/serialize.rs +++ b/src/io/json/write/serialize.rs @@ -18,6 +18,7 @@ use serde_json::map::Map; use serde_json::{Number, Value}; +use crate::bitmap::utils::zip_validity; use crate::{array::*, datatypes::*, record_batch::RecordBatch, types::NativeType}; trait JsonSerializable { @@ -117,7 +118,6 @@ fn struct_array_to_jsonmap_array(array: &StructArray, row_count: usize) -> Vec>>(); - // todo: use validity... array .values() .iter() @@ -202,7 +202,9 @@ fn write_array(array: &dyn Array) -> Value { array.as_any().downcast_ref::().unwrap(), array.len(), ); - jsonmaps.into_iter().map(Value::Object).collect() + zip_validity(jsonmaps.into_iter(), array.validity().map(|v| v.iter())) + .map(|m| m.map(Value::Object).unwrap_or(Value::Null)) + .collect() } _ => { panic!( @@ -295,9 +297,15 @@ fn set_column_for_json_rows( let inner_objs = struct_array_to_jsonmap_array(array, row_count); rows.iter_mut() .take(row_count) - .zip(inner_objs.into_iter()) + .zip(zip_validity( + inner_objs.into_iter(), + array.validity().map(|v| v.iter()), + )) .for_each(|(row, obj)| { - row.insert(col_name.to_string(), Value::Object(obj)); + row.insert( + col_name.to_string(), + obj.map(Value::Object).unwrap_or(Value::Null), + ); }); } DataType::List(_) => { diff --git a/tests/it/io/json/write.rs b/tests/it/io/json/write.rs index f2131778503..3d514f333cd 100644 --- a/tests/it/io/json/write.rs +++ b/tests/it/io/json/write.rs @@ -38,6 +38,55 @@ fn write_simple_rows() { ); } +#[test] +fn write_nested_struct_with_validity() { + let inner = vec![ + Field::new("c121", DataType::Utf8, false), + Field::new("c122", DataType::Int32, false), + ]; + let fields = vec![ + Field::new("c11", DataType::Int32, false), + Field::new("c12", DataType::Struct(inner.clone()), false), + ]; + let schema = Schema::new(vec![ + Field::new("c1", DataType::Struct(fields.clone()), false), + Field::new("c2", DataType::Utf8, false), + ]); + + let c1 = StructArray::from_data( + DataType::Struct(fields), + vec![ + Arc::new(Int32Array::from(&[Some(1), None, Some(5)])), + Arc::new(StructArray::from_data( + DataType::Struct(inner), + vec![ + Arc::new(Utf8Array::::from(&vec![None, Some("f"), Some("g")])), + Arc::new(Int32Array::from(&[Some(20), None, Some(43)])), + ], + Some(Bitmap::from([false, true, true])), + )), + ], + Some(Bitmap::from([true, true, false])), + ); + let c2 = Utf8Array::::from(&vec![Some("a"), Some("b"), Some("c")]); + + let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap(); + + let mut buf = Vec::new(); + { + let mut writer = LineDelimitedWriter::new(&mut buf); + writer.write_batches(&[batch]).unwrap(); + } + + assert_eq!( + String::from_utf8(buf).unwrap(), + r#"{"c1":{"c11":1,"c12":null},"c2":"a"} +{"c1":{"c11":null,"c12":{"c121":"f","c122":null}},"c2":"b"} +{"c1":null,"c2":"c"} +"# + ); +} + #[test] fn write_nested_structs() { let c121 = Field::new("c121", DataType::Utf8, false); @@ -202,10 +251,10 @@ fn write_list_of_struct() { Some("f"), Some("g"), ]))], - None, + Some(Bitmap::from([false, true, true])), )), ], - None, + Some(Bitmap::from([true, true, false])), ); // list column rows (c1): @@ -231,9 +280,9 @@ fn write_list_of_struct() { assert_eq!( String::from_utf8(buf).unwrap(), - r#"{"c1":[{"c11":1,"c12":{"c121":"e"}},{"c11":null,"c12":{"c121":"f"}}],"c2":1} + r#"{"c1":[{"c11":1,"c12":null},{"c11":null,"c12":{"c121":"f"}}],"c2":1} {"c1":null,"c2":2} -{"c1":[{"c11":5,"c12":{"c121":"g"}}],"c2":3} +{"c1":[null],"c2":3} "# ); }