Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Improved json tests #742

Merged
merged 1 commit into from Jan 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
111 changes: 92 additions & 19 deletions tests/it/io/json/mod.rs
Expand Up @@ -5,6 +5,8 @@ use std::io::Cursor;
use std::sync::Arc;

use arrow2::array::*;
use arrow2::bitmap::Bitmap;
use arrow2::buffer::Buffer;
use arrow2::chunk::Chunk;
use arrow2::datatypes::*;
use arrow2::error::Result;
Expand Down Expand Up @@ -65,14 +67,14 @@ fn round_trip_list() -> Result<()> {
round_trip(data)
}

fn case_list() -> (String, Schema, Vec<Box<dyn Array>>) {
fn case_list() -> (String, Vec<Field>, Vec<Box<dyn Array>>) {
let data = r#"{"a":1, "b":[2.0, 1.3, -6.1], "c":[false, true], "d":"4"}
{"a":-10, "b":null, "c":[true, true]}
{"a":null, "b":[2.1, null, -6.2], "c":[false, null], "d":"text"}
"#
.to_string();

let schema = Schema::from(vec![
let fields = vec![
Field::new("a", DataType::Int64, true),
Field::new(
"b",
Expand All @@ -85,9 +87,9 @@ fn case_list() -> (String, Schema, Vec<Box<dyn Array>>) {
true,
),
Field::new("d", DataType::Utf8, true),
]);
let a = Int64Array::from(&[Some(1), Some(-10), None]);
];

let a = Int64Array::from(&[Some(1), Some(-10), None]);
let mut b = MutableListArray::<i32, MutablePrimitiveArray<f64>>::new();
b.try_extend(vec![
Some(vec![Some(2.0), Some(1.3), Some(-6.1)]),
Expand Down Expand Up @@ -115,10 +117,10 @@ fn case_list() -> (String, Schema, Vec<Box<dyn Array>>) {
Box::new(d),
];

(data, schema, columns)
(data, fields, columns)
}

fn case_dict() -> (String, Schema, Vec<Box<dyn Array>>) {
fn case_dict() -> (String, Vec<Field>, Vec<Box<dyn Array>>) {
let data = r#"{"machine": "a", "events": [null, "Elect Leader", "Do Ballot"]}
{"machine": "b", "events": ["Do Ballot", null, "Send Data", "Elect Leader"]}
{"machine": "c", "events": ["Send Data"]}
Expand All @@ -133,7 +135,7 @@ fn case_dict() -> (String, Schema, Vec<Box<dyn Array>>) {
true,
)));

let schema = Schema::from(vec![Field::new("events", data_type, true)]);
let fields = vec![Field::new("events", data_type, true)];

type A = MutableDictionaryArray<u64, MutableUtf8Array<i32>>;

Expand All @@ -155,41 +157,41 @@ fn case_dict() -> (String, Schema, Vec<Box<dyn Array>>) {

let array: ListArray<i32> = array.into();

(data, schema, vec![Box::new(array) as Box<dyn Array>])
(data, fields, vec![Box::new(array) as Box<dyn Array>])
}

fn case_basics() -> (String, Schema, Vec<Box<dyn Array>>) {
fn case_basics() -> (String, Vec<Field>, Vec<Box<dyn Array>>) {
let data = r#"{"a":1, "b":2.0, "c":false, "d":"4"}
{"a":-10, "b":-3.5, "c":true, "d":null}
{"a":100000000, "b":0.6, "d":"text"}"#
.to_string();
let schema = Schema::from(vec![
let fields = vec![
Field::new("a", DataType::Int64, true),
Field::new("b", DataType::Float64, true),
Field::new("c", DataType::Boolean, true),
Field::new("d", DataType::Utf8, true),
]);
];
let columns = vec![
Box::new(Int64Array::from_slice(&[1, -10, 100000000])) as Box<dyn Array>,
Box::new(Float64Array::from_slice(&[2.0, -3.5, 0.6])),
Box::new(BooleanArray::from(&[Some(false), Some(true), None])),
Box::new(Utf8Array::<i32>::from(&[Some("4"), None, Some("text")])),
];
(data, schema, columns)
(data, fields, columns)
}

fn case_basics_schema() -> (String, Schema, Vec<Box<dyn Array>>) {
fn case_projection() -> (String, Vec<Field>, Vec<Box<dyn Array>>) {
let data = r#"{"a":1, "b":2.0, "c":false, "d":"4", "e":"4"}
{"a":10, "b":-3.5, "c":true, "d":null, "e":"text"}
{"a":100000000, "b":0.6, "d":"text"}"#
.to_string();
let schema = Schema::from(vec![
let fields = vec![
Field::new("a", DataType::UInt32, true),
Field::new("b", DataType::Float32, true),
Field::new("c", DataType::Boolean, true),
// note how "d" is not here
Field::new("e", DataType::Binary, true),
]);
];
let columns = vec![
Box::new(UInt32Array::from_slice(&[1, 10, 100000000])) as Box<dyn Array>,
Box::new(Float32Array::from_slice(&[2.0, -3.5, 0.6])),
Expand All @@ -200,10 +202,10 @@ fn case_basics_schema() -> (String, Schema, Vec<Box<dyn Array>>) {
None,
])),
];
(data, schema, columns)
(data, fields, columns)
}

fn case_struct() -> (String, Schema, Vec<Box<dyn Array>>) {
fn case_struct() -> (String, Vec<Field>, Vec<Box<dyn Array>>) {
let data = r#"{"a": {"b": true, "c": {"d": "text"}}}
{"a": {"b": false, "c": null}}
{"a": {"b": true, "c": {"d": "text"}}}
Expand All @@ -220,7 +222,7 @@ fn case_struct() -> (String, Schema, Vec<Box<dyn Array>>) {
]),
true,
);
let schema = Schema::from(vec![a_field]);
let fields = vec![a_field];

// build expected output
let d = Utf8Array::<i32>::from(&vec![Some("text"), None, Some("text"), None]);
Expand All @@ -233,5 +235,76 @@ fn case_struct() -> (String, Schema, Vec<Box<dyn Array>>) {
None,
);

(data, schema, vec![Box::new(expected) as Box<dyn Array>])
(data, fields, vec![Box::new(expected) as Box<dyn Array>])
}

fn case_nested_list() -> (String, Vec<Field>, Vec<Box<dyn Array>>) {
let d_field = Field::new("d", DataType::Utf8, true);
let c_field = Field::new("c", DataType::Struct(vec![d_field.clone()]), true);
let b_field = Field::new("b", DataType::Boolean, true);
let a_struct_field = Field::new(
"a",
DataType::Struct(vec![b_field.clone(), c_field.clone()]),
true,
);
let a_list_data_type = DataType::List(Box::new(a_struct_field));
let a_field = Field::new("a", a_list_data_type.clone(), true);

let data = r#"
{"a": [{"b": true, "c": {"d": "a_text"}}, {"b": false, "c": {"d": "b_text"}}]}
{"a": [{"b": false, "c": null}]}
{"a": [{"b": true, "c": {"d": "c_text"}}, {"b": null, "c": {"d": "d_text"}}, {"b": true, "c": {"d": null}}]}
{"a": null}
{"a": []}
"#.to_string();

// build expected output
let d = Utf8Array::<i32>::from(&vec![
Some("a_text"),
Some("b_text"),
None,
Some("c_text"),
Some("d_text"),
None,
]);

let c = StructArray::from_data(DataType::Struct(vec![d_field]), vec![Arc::new(d)], None);

let b = BooleanArray::from(vec![
Some(true),
Some(false),
Some(false),
Some(true),
None,
Some(true),
]);
let a_struct = StructArray::from_data(
DataType::Struct(vec![b_field, c_field]),
vec![Arc::new(b) as Arc<dyn Array>, Arc::new(c) as Arc<dyn Array>],
None,
);
let expected = ListArray::from_data(
a_list_data_type,
Buffer::from_slice([0i32, 2, 3, 6, 6, 6]),
Arc::new(a_struct) as Arc<dyn Array>,
Some(Bitmap::from_u8_slice([0b00010111], 5)),
);

(
data,
vec![a_field],
vec![Box::new(expected) as Box<dyn Array>],
)
}

fn case(case: &str) -> (String, Vec<Field>, Vec<Box<dyn Array>>) {
match case {
"basics" => case_basics(),
"projection" => case_projection(),
"list" => case_list(),
"dict" => case_dict(),
"struct" => case_struct(),
"nested_list" => case_nested_list(),
_ => todo!(),
}
}
129 changes: 25 additions & 104 deletions tests/it/io/json/read.rs
@@ -1,17 +1,16 @@
use std::{io::Cursor, sync::Arc};
use std::io::Cursor;

use arrow2::array::*;
use arrow2::datatypes::*;
use arrow2::error::Result;
use arrow2::io::json::read;
use arrow2::{bitmap::Bitmap, buffer::Buffer, error::Result};

use super::*;

#[test]
fn basic() -> Result<()> {
let (data, schema, columns) = case_basics();
fn test_case(case_: &str) -> Result<()> {
let (data, fields, columns) = case(case_);

let batch = read_batch(data, &schema.fields)?;
let batch = read_batch(data, &fields)?;

columns
.iter()
Expand All @@ -21,29 +20,33 @@ fn basic() -> Result<()> {
}

#[test]
fn basic_projection() -> Result<()> {
let (data, schema, columns) = case_basics_schema();
fn basic() -> Result<()> {
test_case("basics")
}

let batch = read_batch(data, &schema.fields)?;
#[test]
fn projection() -> Result<()> {
test_case("projection")
}

columns
.iter()
.zip(batch.columns())
.for_each(|(expected, result)| assert_eq!(expected.as_ref(), result.as_ref()));
Ok(())
#[test]
fn dictionary() -> Result<()> {
test_case("dict")
}

#[test]
fn lists() -> Result<()> {
let (data, schema, columns) = case_list();
fn list() -> Result<()> {
test_case("list")
}

let batch = read_batch(data, &schema.fields)?;
#[test]
fn nested_struct() -> Result<()> {
test_case("struct")
}

columns
.iter()
.zip(batch.columns())
.for_each(|(expected, result)| assert_eq!(expected.as_ref(), result.as_ref()));
Ok(())
#[test]
fn nested_list() -> Result<()> {
test_case("nested_list")
}

#[test]
Expand Down Expand Up @@ -88,78 +91,6 @@ fn invalid_read_record() -> Result<()> {
Ok(())
}

#[test]
fn nested_struct_arrays() -> Result<()> {
let (data, schema, columns) = case_struct();

let batch = read_batch(data, &schema.fields)?;

columns
.iter()
.zip(batch.columns())
.for_each(|(expected, result)| assert_eq!(expected.as_ref(), result.as_ref()));
Ok(())
}

#[test]
fn nested_list_arrays() -> Result<()> {
let d_field = Field::new("d", DataType::Utf8, true);
let c_field = Field::new("c", DataType::Struct(vec![d_field.clone()]), true);
let b_field = Field::new("b", DataType::Boolean, true);
let a_struct_field = Field::new(
"a",
DataType::Struct(vec![b_field.clone(), c_field.clone()]),
true,
);
let a_list_data_type = DataType::List(Box::new(a_struct_field));
let a_field = Field::new("a", a_list_data_type.clone(), true);

let data = r#"
{"a": [{"b": true, "c": {"d": "a_text"}}, {"b": false, "c": {"d": "b_text"}}]}
{"a": [{"b": false, "c": null}]}
{"a": [{"b": true, "c": {"d": "c_text"}}, {"b": null, "c": {"d": "d_text"}}, {"b": true, "c": {"d": null}}]}
{"a": null}
{"a": []}
"#;

let batch = read_batch(data.to_string(), &[a_field])?;

// build expected output
let d = Utf8Array::<i32>::from(&vec![
Some("a_text"),
Some("b_text"),
None,
Some("c_text"),
Some("d_text"),
None,
]);

let c = StructArray::from_data(DataType::Struct(vec![d_field]), vec![Arc::new(d)], None);

let b = BooleanArray::from(vec![
Some(true),
Some(false),
Some(false),
Some(true),
None,
Some(true),
]);
let a_struct = StructArray::from_data(
DataType::Struct(vec![b_field, c_field]),
vec![Arc::new(b) as Arc<dyn Array>, Arc::new(c) as Arc<dyn Array>],
None,
);
let expected = ListArray::from_data(
a_list_data_type,
Buffer::from_slice([0i32, 2, 3, 6, 6, 6]),
Arc::new(a_struct) as Arc<dyn Array>,
Some(Bitmap::from_u8_slice([0b00010111], 5)),
);

assert_eq!(expected, batch.columns()[0].as_ref());
Ok(())
}

#[test]
fn skip_empty_lines() {
let data = "
Expand Down Expand Up @@ -188,16 +119,6 @@ fn row_type_validation() {
);
}

#[test]
fn list_of_string_dictionary_from_with_nulls() -> Result<()> {
let (data, schema, columns) = case_dict();

let batch = read_batch(data, &schema.fields)?;

assert_eq!(columns[0].as_ref(), batch.columns()[0].as_ref());
Ok(())
}

#[test]
fn infer_schema_mixed_list() -> Result<()> {
let data = r#"{"a":1, "b":[2.0, 1.3, -6.1], "c":[false, true], "d":4.1}
Expand Down