Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved json tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Jan 8, 2022
1 parent 299df30 commit cc97180
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 123 deletions.
111 changes: 92 additions & 19 deletions tests/it/io/json/mod.rs
Expand Up @@ -5,6 +5,8 @@ use std::io::Cursor;
use std::sync::Arc;

use arrow2::array::*;
use arrow2::bitmap::Bitmap;
use arrow2::buffer::Buffer;
use arrow2::chunk::Chunk;
use arrow2::datatypes::*;
use arrow2::error::Result;
Expand Down Expand Up @@ -65,14 +67,14 @@ fn round_trip_list() -> Result<()> {
round_trip(data)
}

fn case_list() -> (String, Schema, Vec<Box<dyn Array>>) {
fn case_list() -> (String, Vec<Field>, Vec<Box<dyn Array>>) {
let data = r#"{"a":1, "b":[2.0, 1.3, -6.1], "c":[false, true], "d":"4"}
{"a":-10, "b":null, "c":[true, true]}
{"a":null, "b":[2.1, null, -6.2], "c":[false, null], "d":"text"}
"#
.to_string();

let schema = Schema::from(vec![
let fields = vec![
Field::new("a", DataType::Int64, true),
Field::new(
"b",
Expand All @@ -85,9 +87,9 @@ fn case_list() -> (String, Schema, Vec<Box<dyn Array>>) {
true,
),
Field::new("d", DataType::Utf8, true),
]);
let a = Int64Array::from(&[Some(1), Some(-10), None]);
];

let a = Int64Array::from(&[Some(1), Some(-10), None]);
let mut b = MutableListArray::<i32, MutablePrimitiveArray<f64>>::new();
b.try_extend(vec![
Some(vec![Some(2.0), Some(1.3), Some(-6.1)]),
Expand Down Expand Up @@ -115,10 +117,10 @@ fn case_list() -> (String, Schema, Vec<Box<dyn Array>>) {
Box::new(d),
];

(data, schema, columns)
(data, fields, columns)
}

fn case_dict() -> (String, Schema, Vec<Box<dyn Array>>) {
fn case_dict() -> (String, Vec<Field>, Vec<Box<dyn Array>>) {
let data = r#"{"machine": "a", "events": [null, "Elect Leader", "Do Ballot"]}
{"machine": "b", "events": ["Do Ballot", null, "Send Data", "Elect Leader"]}
{"machine": "c", "events": ["Send Data"]}
Expand All @@ -133,7 +135,7 @@ fn case_dict() -> (String, Schema, Vec<Box<dyn Array>>) {
true,
)));

let schema = Schema::from(vec![Field::new("events", data_type, true)]);
let fields = vec![Field::new("events", data_type, true)];

type A = MutableDictionaryArray<u64, MutableUtf8Array<i32>>;

Expand All @@ -155,41 +157,41 @@ fn case_dict() -> (String, Schema, Vec<Box<dyn Array>>) {

let array: ListArray<i32> = array.into();

(data, schema, vec![Box::new(array) as Box<dyn Array>])
(data, fields, vec![Box::new(array) as Box<dyn Array>])
}

fn case_basics() -> (String, Schema, Vec<Box<dyn Array>>) {
fn case_basics() -> (String, Vec<Field>, Vec<Box<dyn Array>>) {
let data = r#"{"a":1, "b":2.0, "c":false, "d":"4"}
{"a":-10, "b":-3.5, "c":true, "d":null}
{"a":100000000, "b":0.6, "d":"text"}"#
.to_string();
let schema = Schema::from(vec![
let fields = vec![
Field::new("a", DataType::Int64, true),
Field::new("b", DataType::Float64, true),
Field::new("c", DataType::Boolean, true),
Field::new("d", DataType::Utf8, true),
]);
];
let columns = vec![
Box::new(Int64Array::from_slice(&[1, -10, 100000000])) as Box<dyn Array>,
Box::new(Float64Array::from_slice(&[2.0, -3.5, 0.6])),
Box::new(BooleanArray::from(&[Some(false), Some(true), None])),
Box::new(Utf8Array::<i32>::from(&[Some("4"), None, Some("text")])),
];
(data, schema, columns)
(data, fields, columns)
}

fn case_basics_schema() -> (String, Schema, Vec<Box<dyn Array>>) {
fn case_projection() -> (String, Vec<Field>, Vec<Box<dyn Array>>) {
let data = r#"{"a":1, "b":2.0, "c":false, "d":"4", "e":"4"}
{"a":10, "b":-3.5, "c":true, "d":null, "e":"text"}
{"a":100000000, "b":0.6, "d":"text"}"#
.to_string();
let schema = Schema::from(vec![
let fields = vec![
Field::new("a", DataType::UInt32, true),
Field::new("b", DataType::Float32, true),
Field::new("c", DataType::Boolean, true),
// note how "d" is not here
Field::new("e", DataType::Binary, true),
]);
];
let columns = vec![
Box::new(UInt32Array::from_slice(&[1, 10, 100000000])) as Box<dyn Array>,
Box::new(Float32Array::from_slice(&[2.0, -3.5, 0.6])),
Expand All @@ -200,10 +202,10 @@ fn case_basics_schema() -> (String, Schema, Vec<Box<dyn Array>>) {
None,
])),
];
(data, schema, columns)
(data, fields, columns)
}

fn case_struct() -> (String, Schema, Vec<Box<dyn Array>>) {
fn case_struct() -> (String, Vec<Field>, Vec<Box<dyn Array>>) {
let data = r#"{"a": {"b": true, "c": {"d": "text"}}}
{"a": {"b": false, "c": null}}
{"a": {"b": true, "c": {"d": "text"}}}
Expand All @@ -220,7 +222,7 @@ fn case_struct() -> (String, Schema, Vec<Box<dyn Array>>) {
]),
true,
);
let schema = Schema::from(vec![a_field]);
let fields = vec![a_field];

// build expected output
let d = Utf8Array::<i32>::from(&vec![Some("text"), None, Some("text"), None]);
Expand All @@ -233,5 +235,76 @@ fn case_struct() -> (String, Schema, Vec<Box<dyn Array>>) {
None,
);

(data, schema, vec![Box::new(expected) as Box<dyn Array>])
(data, fields, vec![Box::new(expected) as Box<dyn Array>])
}

fn case_nested_list() -> (String, Vec<Field>, Vec<Box<dyn Array>>) {
let d_field = Field::new("d", DataType::Utf8, true);
let c_field = Field::new("c", DataType::Struct(vec![d_field.clone()]), true);
let b_field = Field::new("b", DataType::Boolean, true);
let a_struct_field = Field::new(
"a",
DataType::Struct(vec![b_field.clone(), c_field.clone()]),
true,
);
let a_list_data_type = DataType::List(Box::new(a_struct_field));
let a_field = Field::new("a", a_list_data_type.clone(), true);

let data = r#"
{"a": [{"b": true, "c": {"d": "a_text"}}, {"b": false, "c": {"d": "b_text"}}]}
{"a": [{"b": false, "c": null}]}
{"a": [{"b": true, "c": {"d": "c_text"}}, {"b": null, "c": {"d": "d_text"}}, {"b": true, "c": {"d": null}}]}
{"a": null}
{"a": []}
"#.to_string();

// build expected output
let d = Utf8Array::<i32>::from(&vec![
Some("a_text"),
Some("b_text"),
None,
Some("c_text"),
Some("d_text"),
None,
]);

let c = StructArray::from_data(DataType::Struct(vec![d_field]), vec![Arc::new(d)], None);

let b = BooleanArray::from(vec![
Some(true),
Some(false),
Some(false),
Some(true),
None,
Some(true),
]);
let a_struct = StructArray::from_data(
DataType::Struct(vec![b_field, c_field]),
vec![Arc::new(b) as Arc<dyn Array>, Arc::new(c) as Arc<dyn Array>],
None,
);
let expected = ListArray::from_data(
a_list_data_type,
Buffer::from_slice([0i32, 2, 3, 6, 6, 6]),
Arc::new(a_struct) as Arc<dyn Array>,
Some(Bitmap::from_u8_slice([0b00010111], 5)),
);

(
data,
vec![a_field],
vec![Box::new(expected) as Box<dyn Array>],
)
}

fn case(case: &str) -> (String, Vec<Field>, Vec<Box<dyn Array>>) {
match case {
"basics" => case_basics(),
"projection" => case_projection(),
"list" => case_list(),
"dict" => case_dict(),
"struct" => case_struct(),
"nested_list" => case_nested_list(),
_ => todo!(),
}
}
129 changes: 25 additions & 104 deletions tests/it/io/json/read.rs
@@ -1,17 +1,16 @@
use std::{io::Cursor, sync::Arc};
use std::io::Cursor;

use arrow2::array::*;
use arrow2::datatypes::*;
use arrow2::error::Result;
use arrow2::io::json::read;
use arrow2::{bitmap::Bitmap, buffer::Buffer, error::Result};

use super::*;

#[test]
fn basic() -> Result<()> {
let (data, schema, columns) = case_basics();
fn test_case(case_: &str) -> Result<()> {
let (data, fields, columns) = case(case_);

let batch = read_batch(data, &schema.fields)?;
let batch = read_batch(data, &fields)?;

columns
.iter()
Expand All @@ -21,29 +20,33 @@ fn basic() -> Result<()> {
}

#[test]
fn basic_projection() -> Result<()> {
let (data, schema, columns) = case_basics_schema();
fn basic() -> Result<()> {
test_case("basics")
}

let batch = read_batch(data, &schema.fields)?;
#[test]
fn projection() -> Result<()> {
test_case("projection")
}

columns
.iter()
.zip(batch.columns())
.for_each(|(expected, result)| assert_eq!(expected.as_ref(), result.as_ref()));
Ok(())
#[test]
fn dictionary() -> Result<()> {
test_case("dict")
}

#[test]
fn lists() -> Result<()> {
let (data, schema, columns) = case_list();
fn list() -> Result<()> {
test_case("list")
}

let batch = read_batch(data, &schema.fields)?;
#[test]
fn nested_struct() -> Result<()> {
test_case("struct")
}

columns
.iter()
.zip(batch.columns())
.for_each(|(expected, result)| assert_eq!(expected.as_ref(), result.as_ref()));
Ok(())
#[test]
fn nested_list() -> Result<()> {
test_case("nested_list")
}

#[test]
Expand Down Expand Up @@ -88,78 +91,6 @@ fn invalid_read_record() -> Result<()> {
Ok(())
}

#[test]
fn nested_struct_arrays() -> Result<()> {
let (data, schema, columns) = case_struct();

let batch = read_batch(data, &schema.fields)?;

columns
.iter()
.zip(batch.columns())
.for_each(|(expected, result)| assert_eq!(expected.as_ref(), result.as_ref()));
Ok(())
}

#[test]
fn nested_list_arrays() -> Result<()> {
let d_field = Field::new("d", DataType::Utf8, true);
let c_field = Field::new("c", DataType::Struct(vec![d_field.clone()]), true);
let b_field = Field::new("b", DataType::Boolean, true);
let a_struct_field = Field::new(
"a",
DataType::Struct(vec![b_field.clone(), c_field.clone()]),
true,
);
let a_list_data_type = DataType::List(Box::new(a_struct_field));
let a_field = Field::new("a", a_list_data_type.clone(), true);

let data = r#"
{"a": [{"b": true, "c": {"d": "a_text"}}, {"b": false, "c": {"d": "b_text"}}]}
{"a": [{"b": false, "c": null}]}
{"a": [{"b": true, "c": {"d": "c_text"}}, {"b": null, "c": {"d": "d_text"}}, {"b": true, "c": {"d": null}}]}
{"a": null}
{"a": []}
"#;

let batch = read_batch(data.to_string(), &[a_field])?;

// build expected output
let d = Utf8Array::<i32>::from(&vec![
Some("a_text"),
Some("b_text"),
None,
Some("c_text"),
Some("d_text"),
None,
]);

let c = StructArray::from_data(DataType::Struct(vec![d_field]), vec![Arc::new(d)], None);

let b = BooleanArray::from(vec![
Some(true),
Some(false),
Some(false),
Some(true),
None,
Some(true),
]);
let a_struct = StructArray::from_data(
DataType::Struct(vec![b_field, c_field]),
vec![Arc::new(b) as Arc<dyn Array>, Arc::new(c) as Arc<dyn Array>],
None,
);
let expected = ListArray::from_data(
a_list_data_type,
Buffer::from_slice([0i32, 2, 3, 6, 6, 6]),
Arc::new(a_struct) as Arc<dyn Array>,
Some(Bitmap::from_u8_slice([0b00010111], 5)),
);

assert_eq!(expected, batch.columns()[0].as_ref());
Ok(())
}

#[test]
fn skip_empty_lines() {
let data = "
Expand Down Expand Up @@ -188,16 +119,6 @@ fn row_type_validation() {
);
}

#[test]
fn list_of_string_dictionary_from_with_nulls() -> Result<()> {
let (data, schema, columns) = case_dict();

let batch = read_batch(data, &schema.fields)?;

assert_eq!(columns[0].as_ref(), batch.columns()[0].as_ref());
Ok(())
}

#[test]
fn infer_schema_mixed_list() -> Result<()> {
let data = r#"{"a":1, "b":[2.0, 1.3, -6.1], "c":[false, true], "d":4.1}
Expand Down

0 comments on commit cc97180

Please sign in to comment.