Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Simpler testing
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed May 7, 2022
1 parent fc91dd8 commit 6654ff7
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 41 deletions.
2 changes: 1 addition & 1 deletion tests/it/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ mod json;
#[cfg(feature = "io_json")]
mod ndjson;

#[cfg(feature = "io_ipc")]
#[cfg(feature = "io_json_integration")]
mod ipc;

#[cfg(feature = "io_parquet")]
Expand Down
41 changes: 41 additions & 0 deletions tests/it/io/parquet/integration.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
use arrow2::error::Result;

use super::{integration_read, integration_write};
use crate::io::ipc::read_gzip_json;

fn test_file(version: &str, file_name: &str) -> Result<()> {
let (schema, _, batches) = read_gzip_json(version, file_name)?;

// empty batches are not written/read from parquet and can be ignored
let batches = batches
.into_iter()
.filter(|x| !x.is_empty())
.collect::<Vec<_>>();

let data = integration_write(&schema, &batches)?;

let (read_schema, read_batches) = integration_read(&data)?;

assert_eq!(schema, read_schema);
assert_eq!(batches, read_batches);

Ok(())
}

#[test]
fn roundtrip_100_primitive() -> Result<()> {
test_file("1.0.0-littleendian", "generated_primitive")?;
test_file("1.0.0-bigendian", "generated_primitive")
}

#[test]
fn roundtrip_100_dict() -> Result<()> {
test_file("1.0.0-littleendian", "generated_dictionary")?;
test_file("1.0.0-bigendian", "generated_dictionary")
}

#[test]
fn roundtrip_100_extension() -> Result<()> {
test_file("1.0.0-littleendian", "generated_extension")?;
test_file("1.0.0-bigendian", "generated_extension")
}
42 changes: 2 additions & 40 deletions tests/it/io/parquet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ use arrow2::{
io::parquet::read::statistics::*, io::parquet::read::*, io::parquet::write::*,
};

use crate::io::ipc::read_gzip_json;

#[cfg(feature = "io_json_integration")]
mod integration;
mod read;
mod read_indexes;
mod write;
Expand Down Expand Up @@ -789,7 +789,6 @@ pub fn pyarrow_struct_statistics(column: &str) -> Statistics {
}
}

/// Round-trip with parquet using the same integration files used for IPC integration tests.
fn integration_write(schema: &Schema, batches: &[Chunk<Arc<dyn Array>>]) -> Result<Vec<u8>> {
let options = WriteOptions {
write_statistics: true,
Expand Down Expand Up @@ -841,43 +840,6 @@ fn integration_read(data: &[u8]) -> Result<IntegrationRead> {
Ok((schema, batches))
}

fn test_file(version: &str, file_name: &str) -> Result<()> {
let (schema, _, batches) = read_gzip_json(version, file_name)?;

// empty batches are not written/read from parquet and can be ignored
let batches = batches
.into_iter()
.filter(|x| !x.is_empty())
.collect::<Vec<_>>();

let data = integration_write(&schema, &batches)?;

let (read_schema, read_batches) = integration_read(&data)?;

assert_eq!(schema, read_schema);
assert_eq!(batches, read_batches);

Ok(())
}

#[test]
fn roundtrip_100_primitive() -> Result<()> {
test_file("1.0.0-littleendian", "generated_primitive")?;
test_file("1.0.0-bigendian", "generated_primitive")
}

#[test]
fn roundtrip_100_dict() -> Result<()> {
test_file("1.0.0-littleendian", "generated_dictionary")?;
test_file("1.0.0-bigendian", "generated_dictionary")
}

#[test]
fn roundtrip_100_extension() -> Result<()> {
test_file("1.0.0-littleendian", "generated_extension")?;
test_file("1.0.0-bigendian", "generated_extension")
}

/// Tests that when arrow-specific types (Duration and LargeUtf8) are written to parquet, we can rountrip its
/// logical types.
#[test]
Expand Down
3 changes: 3 additions & 0 deletions tests/it/io/parquet/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ fn v1_nested_edge_2() -> Result<()> {
test_pyarrow_integration("null", 1, "nested_edge", false, false, None)
}

#[cfg(feature = "io_parquet_compression")]
#[test]
fn all_types() -> Result<()> {
let path = "testing/parquet-testing/data/alltypes_plain.parquet";
Expand Down Expand Up @@ -495,6 +496,7 @@ fn all_types() -> Result<()> {
Ok(())
}

#[cfg(feature = "io_parquet_compression")]
#[test]
fn all_types_chunked() -> Result<()> {
// this has one batch with 8 elements
Expand Down Expand Up @@ -546,6 +548,7 @@ fn all_types_chunked() -> Result<()> {
Ok(())
}

#[cfg(feature = "io_parquet_compression")]
#[test]
fn invalid_utf8() {
let invalid_data = &[
Expand Down
5 changes: 5 additions & 0 deletions tests/it/io/parquet/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ fn int64_optional_v2() -> Result<()> {
)
}

#[cfg(feature = "io_parquet_compression")]
#[test]
fn int64_optional_v2_compressed() -> Result<()> {
round_trip(
Expand Down Expand Up @@ -157,6 +158,7 @@ fn utf8_required_v2() -> Result<()> {
)
}

#[cfg(feature = "io_parquet_compression")]
#[test]
fn utf8_optional_v2_compressed() -> Result<()> {
round_trip(
Expand All @@ -169,6 +171,7 @@ fn utf8_optional_v2_compressed() -> Result<()> {
)
}

#[cfg(feature = "io_parquet_compression")]
#[test]
fn utf8_required_v2_compressed() -> Result<()> {
round_trip(
Expand Down Expand Up @@ -229,6 +232,7 @@ fn bool_required_v2_uncompressed() -> Result<()> {
)
}

#[cfg(feature = "io_parquet_compression")]
#[test]
fn bool_required_v2_compressed() -> Result<()> {
round_trip(
Expand Down Expand Up @@ -386,6 +390,7 @@ fn i32_optional_v2_dict() -> Result<()> {
)
}

#[cfg(feature = "io_parquet_compression")]
#[test]
fn i32_optional_v2_dict_compressed() -> Result<()> {
round_trip(
Expand Down

0 comments on commit 6654ff7

Please sign in to comment.