Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Validity taken into account when writing StructArray to json (#511)
Browse files Browse the repository at this point in the history
  • Loading branch information
VasanthakumarV committed Oct 10, 2021
1 parent 14b1254 commit eead22f
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 8 deletions.
16 changes: 12 additions & 4 deletions src/io/json/write/serialize.rs
Expand Up @@ -18,6 +18,7 @@
use serde_json::map::Map;
use serde_json::{Number, Value};

use crate::bitmap::utils::zip_validity;
use crate::{array::*, datatypes::*, record_batch::RecordBatch, types::NativeType};

trait JsonSerializable {
Expand Down Expand Up @@ -117,7 +118,6 @@ fn struct_array_to_jsonmap_array(array: &StructArray, row_count: usize) -> Vec<M
.take(row_count)
.collect::<Vec<Map<String, Value>>>();

// todo: use validity...
array
.values()
.iter()
Expand Down Expand Up @@ -202,7 +202,9 @@ fn write_array(array: &dyn Array) -> Value {
array.as_any().downcast_ref::<StructArray>().unwrap(),
array.len(),
);
jsonmaps.into_iter().map(Value::Object).collect()
zip_validity(jsonmaps.into_iter(), array.validity().map(|v| v.iter()))
.map(|m| m.map(Value::Object).unwrap_or(Value::Null))
.collect()
}
_ => {
panic!(
Expand Down Expand Up @@ -295,9 +297,15 @@ fn set_column_for_json_rows(
let inner_objs = struct_array_to_jsonmap_array(array, row_count);
rows.iter_mut()
.take(row_count)
.zip(inner_objs.into_iter())
.zip(zip_validity(
inner_objs.into_iter(),
array.validity().map(|v| v.iter()),
))
.for_each(|(row, obj)| {
row.insert(col_name.to_string(), Value::Object(obj));
row.insert(
col_name.to_string(),
obj.map(Value::Object).unwrap_or(Value::Null),
);
});
}
DataType::List(_) => {
Expand Down
57 changes: 53 additions & 4 deletions tests/it/io/json/write.rs
Expand Up @@ -38,6 +38,55 @@ fn write_simple_rows() {
);
}

#[test]
fn write_nested_struct_with_validity() {
let inner = vec![
Field::new("c121", DataType::Utf8, false),
Field::new("c122", DataType::Int32, false),
];
let fields = vec![
Field::new("c11", DataType::Int32, false),
Field::new("c12", DataType::Struct(inner.clone()), false),
];
let schema = Schema::new(vec![
Field::new("c1", DataType::Struct(fields.clone()), false),
Field::new("c2", DataType::Utf8, false),
]);

let c1 = StructArray::from_data(
DataType::Struct(fields),
vec![
Arc::new(Int32Array::from(&[Some(1), None, Some(5)])),
Arc::new(StructArray::from_data(
DataType::Struct(inner),
vec![
Arc::new(Utf8Array::<i32>::from(&vec![None, Some("f"), Some("g")])),
Arc::new(Int32Array::from(&[Some(20), None, Some(43)])),
],
Some(Bitmap::from([false, true, true])),
)),
],
Some(Bitmap::from([true, true, false])),
);
let c2 = Utf8Array::<i32>::from(&vec![Some("a"), Some("b"), Some("c")]);

let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap();

let mut buf = Vec::new();
{
let mut writer = LineDelimitedWriter::new(&mut buf);
writer.write_batches(&[batch]).unwrap();
}

assert_eq!(
String::from_utf8(buf).unwrap(),
r#"{"c1":{"c11":1,"c12":null},"c2":"a"}
{"c1":{"c11":null,"c12":{"c121":"f","c122":null}},"c2":"b"}
{"c1":null,"c2":"c"}
"#
);
}

#[test]
fn write_nested_structs() {
let c121 = Field::new("c121", DataType::Utf8, false);
Expand Down Expand Up @@ -202,10 +251,10 @@ fn write_list_of_struct() {
Some("f"),
Some("g"),
]))],
None,
Some(Bitmap::from([false, true, true])),
)),
],
None,
Some(Bitmap::from([true, true, false])),
);

// list column rows (c1):
Expand All @@ -231,9 +280,9 @@ fn write_list_of_struct() {

assert_eq!(
String::from_utf8(buf).unwrap(),
r#"{"c1":[{"c11":1,"c12":{"c121":"e"}},{"c11":null,"c12":{"c121":"f"}}],"c2":1}
r#"{"c1":[{"c11":1,"c12":null},{"c11":null,"c12":{"c121":"f"}}],"c2":1}
{"c1":null,"c2":2}
{"c1":[{"c11":5,"c12":{"c121":"g"}}],"c2":3}
{"c1":[null],"c2":3}
"#
);
}

0 comments on commit eead22f

Please sign in to comment.