From f1ff3586850b3e7a8569fb977537d1caad743c13 Mon Sep 17 00:00:00 2001 From: Jorge Leitao Date: Fri, 26 Nov 2021 17:16:16 +0100 Subject: [PATCH] Improved benches. (#636) --- Cargo.toml | 2 +- benches/aggregate.rs | 6 +++--- benches/arithmetic_kernels.rs | 11 ++++------- benches/bitmap.rs | 6 +----- benches/bitmap_ops.rs | 4 ++-- benches/bitwise.rs | 19 +++++++------------ benches/cast_kernels.rs | 32 +++++++++++++++----------------- benches/comparison_kernels.rs | 6 +++--- benches/concatenate.rs | 11 ++++------- benches/count_zeros.rs | 4 ++-- benches/filter_kernels.rs | 18 ++++++++---------- benches/from_trusted_len_iter.rs | 4 +--- benches/growable.rs | 11 ++++------- benches/hash_kernel.rs | 9 +++------ benches/iter_list.rs | 14 +++++++------- benches/iter_utf8.rs | 4 ++-- benches/length_kernel.rs | 4 +--- benches/sort_kernel.rs | 14 ++++++-------- benches/take_kernels.rs | 13 ++++--------- benches/unset_count.rs | 4 ++-- benches/write_csv.rs | 14 ++++---------- benches/write_ipc.rs | 6 +++--- benches/write_parquet.rs | 28 ++++++++++++---------------- src/util/bench_util.rs | 30 +++--------------------------- 24 files changed, 102 insertions(+), 172 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 23e6be30476..74de95023da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -117,7 +117,7 @@ full = [ "regex", "compute", # parses timezones used in timestamp conversions - "chrono-tz" + "chrono-tz", ] io_csv = ["io_csv_read", "io_csv_write"] io_csv_async = ["io_csv_read_async"] diff --git a/benches/aggregate.rs b/benches/aggregate.rs index 3616f5ee9d7..a8b666458fe 100644 --- a/benches/aggregate.rs +++ b/benches/aggregate.rs @@ -1,8 +1,8 @@ use criterion::{criterion_group, criterion_main, Criterion}; use arrow2::array::*; +use arrow2::compute::aggregate::*; use arrow2::util::bench_util::*; -use arrow2::{compute::aggregate::*, datatypes::DataType}; fn bench_sum(arr_a: &PrimitiveArray) { sum(criterion::black_box(arr_a)).unwrap(); @@ -15,7 +15,7 @@ fn bench_min(arr_a: &PrimitiveArray) { fn add_benchmark(c: &mut Criterion) { (10..=20).step_by(2).for_each(|log2_size| { let size = 2usize.pow(log2_size); - let arr_a = create_primitive_array::(size, DataType::Float32, 0.0); + let arr_a = create_primitive_array::(size, 0.0); c.bench_function(&format!("sum 2^{} f32", log2_size), |b| { b.iter(|| bench_sum(&arr_a)) @@ -24,7 +24,7 @@ fn add_benchmark(c: &mut Criterion) { b.iter(|| bench_min(&arr_a)) }); - let arr_a = create_primitive_array::(size, DataType::Float32, 0.1); + let arr_a = create_primitive_array::(size, 0.1); c.bench_function(&format!("sum null 2^{} f32", log2_size), |b| { b.iter(|| bench_sum(&arr_a)) diff --git a/benches/arithmetic_kernels.rs b/benches/arithmetic_kernels.rs index 4d800bd7bec..b0e487341b9 100644 --- a/benches/arithmetic_kernels.rs +++ b/benches/arithmetic_kernels.rs @@ -1,12 +1,9 @@ -#[macro_use] -extern crate criterion; -use criterion::Criterion; +use criterion::{criterion_group, criterion_main, Criterion}; use arrow2::array::*; use arrow2::util::bench_util::*; use arrow2::{ - compute::arithmetics::basic::add, compute::arithmetics::basic::div_scalar, datatypes::DataType, - types::NativeType, + compute::arithmetics::basic::add, compute::arithmetics::basic::div_scalar, types::NativeType, }; use num_traits::NumCast; use std::ops::{Add, Div}; @@ -28,8 +25,8 @@ where fn add_benchmark(c: &mut Criterion) { (10..=20).step_by(2).for_each(|log2_size| { let size = 2usize.pow(log2_size); - let arr_a = create_primitive_array_with_seed::(size, DataType::UInt64, 0.0, 43); - let arr_b = create_primitive_array_with_seed::(size, DataType::UInt64, 0.0, 42); + let arr_a = create_primitive_array_with_seed::(size, 0.0, 43); + let arr_b = create_primitive_array_with_seed::(size, 0.0, 42); c.bench_function(&format!("divide_scalar 2^{}", log2_size), |b| { // 4 is a very fast optimizable divisor diff --git a/benches/bitmap.rs b/benches/bitmap.rs index 3c7ac0983bb..4f77bb3ac87 100644 --- a/benches/bitmap.rs +++ b/benches/bitmap.rs @@ -1,12 +1,8 @@ -extern crate arrow2; - use std::iter::FromIterator; -use arrow2::bitmap::*; - use criterion::{criterion_group, criterion_main, Criterion}; -// +use arrow2::bitmap::*; fn add_benchmark(c: &mut Criterion) { (10..=20).step_by(2).for_each(|log2_size| { diff --git a/benches/bitmap_ops.rs b/benches/bitmap_ops.rs index 8a606fb8d36..f9c667db5e7 100644 --- a/benches/bitmap_ops.rs +++ b/benches/bitmap_ops.rs @@ -1,7 +1,7 @@ -use arrow2::bitmap::Bitmap; - use criterion::{criterion_group, criterion_main, Criterion}; +use arrow2::bitmap::Bitmap; + fn bench_arrow2(lhs: &Bitmap, rhs: &Bitmap) { let r = lhs | rhs; assert!(r.null_count() > 0); diff --git a/benches/bitwise.rs b/benches/bitwise.rs index 7597b15dd7b..4e1fc9f82dd 100644 --- a/benches/bitwise.rs +++ b/benches/bitwise.rs @@ -4,34 +4,29 @@ use criterion::{criterion_group, criterion_main, Criterion}; use num_traits::NumCast; use arrow2::{ - array::PrimitiveArray, - compute::bitwise::*, - datatypes::DataType, - types::NativeType, - util::bench_util::{ - create_boolean_array, create_primitive_array, create_primitive_array_with_seed, - }, + array::PrimitiveArray, compute::bitwise::*, types::NativeType, + util::bench_util::create_primitive_array_with_seed, }; fn bench_or(lhs: &PrimitiveArray, rhs: &PrimitiveArray) where T: NativeType + BitOr + NumCast, { - criterion::black_box(or(lhs, rhs)).unwrap(); + criterion::black_box(or(lhs, rhs)); } fn bench_xor(lhs: &PrimitiveArray, rhs: &PrimitiveArray) where T: NativeType + BitXor + NumCast, { - criterion::black_box(xor(lhs, rhs)).unwrap(); + criterion::black_box(xor(lhs, rhs)); } fn bench_and(lhs: &PrimitiveArray, rhs: &PrimitiveArray) where T: NativeType + BitAnd + NumCast, { - criterion::black_box(and(lhs, rhs)).unwrap(); + criterion::black_box(and(lhs, rhs)); } fn bench_not(arr: &PrimitiveArray) @@ -44,8 +39,8 @@ where fn add_benchmark(c: &mut Criterion) { (10..=20).step_by(2).for_each(|log2_size| { let size = 2usize.pow(log2_size); - let arr_a = create_primitive_array_with_seed::(size, DataType::UInt64, 0.0, 43); - let arr_b = create_primitive_array_with_seed::(size, DataType::UInt64, 0.0, 42); + let arr_a = create_primitive_array_with_seed::(size, 0.0, 43); + let arr_b = create_primitive_array_with_seed::(size, 0.0, 42); c.bench_function(&format!("or 2^{}", log2_size), |b| { b.iter(|| bench_or(&arr_a, &arr_b)) diff --git a/benches/cast_kernels.rs b/benches/cast_kernels.rs index 0c2925baaf1..b30ef4cc04d 100644 --- a/benches/cast_kernels.rs +++ b/benches/cast_kernels.rs @@ -15,9 +15,7 @@ // specific language governing permissions and limitations // under the License. -#[macro_use] -extern crate criterion; -use criterion::Criterion; +use criterion::{criterion_group, criterion_main, Criterion}; use rand::distributions::Uniform; use rand::Rng; @@ -72,27 +70,27 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> Utf8Array { // cast array from specified primitive array type to desired data type fn cast_array(array: &dyn Array, to_type: DataType) { - criterion::black_box(cast::cast(array, &to_type).unwrap()); + criterion::black_box(cast::cast(array, &to_type, Default::default()).unwrap()); } fn add_benchmark(c: &mut Criterion) { let size = 512; - let i32_array = create_primitive_array::(size, DataType::Int32, 0.1); - let i64_array = create_primitive_array::(size, DataType::Int64, 0.1); - let f32_array = create_primitive_array::(size, DataType::Float32, 0.1); - let f32_utf8_array = cast::cast(&f32_array, &DataType::Utf8).unwrap(); + let i32_array = create_primitive_array::(size, 0.1); + let i64_array = create_primitive_array::(size, 0.1); + let f32_array = create_primitive_array::(size, 0.1); + let f32_utf8_array = cast::cast(&f32_array, &DataType::Utf8, Default::default()).unwrap(); - let f64_array = create_primitive_array::(size, DataType::Float64, 0.1); - let date64_array = create_primitive_array::(size, DataType::Date64, 0.1); - let date32_array = create_primitive_array::(size, DataType::Date32, 0.1); + let f64_array = create_primitive_array::(size, 0.1); + let date64_array = create_primitive_array::(size, 0.1).to(DataType::Date64); + let date32_array = create_primitive_array::(size, 0.1).to(DataType::Date32); let time32s_array = - create_primitive_array::(size, DataType::Time32(TimeUnit::Second), 0.1); + create_primitive_array::(size, 0.1).to(DataType::Time32(TimeUnit::Second)); let time64ns_array = - create_primitive_array::(size, DataType::Time64(TimeUnit::Nanosecond), 0.1); - let time_ns_array = - create_primitive_array::(size, DataType::Timestamp(TimeUnit::Nanosecond, None), 0.1); - let time_ms_array = - create_primitive_array::(size, DataType::Timestamp(TimeUnit::Millisecond, None), 0.1); + create_primitive_array::(size, 0.1).to(DataType::Time64(TimeUnit::Nanosecond)); + let time_ns_array = create_primitive_array::(size, 0.1) + .to(DataType::Timestamp(TimeUnit::Nanosecond, None)); + let time_ms_array = create_primitive_array::(size, 0.1) + .to(DataType::Timestamp(TimeUnit::Millisecond, None)); let utf8_date_array = build_utf8_date_array(512, true); let utf8_date_time_array = build_utf8_date_time_array(512, true); diff --git a/benches/comparison_kernels.rs b/benches/comparison_kernels.rs index 86e48a89676..1371471d2cf 100644 --- a/benches/comparison_kernels.rs +++ b/benches/comparison_kernels.rs @@ -1,15 +1,15 @@ use criterion::{criterion_group, criterion_main, Criterion}; +use arrow2::compute::comparison::{eq, eq_scalar}; use arrow2::scalar::*; use arrow2::util::bench_util::*; -use arrow2::{compute::comparison::eq, datatypes::DataType}; fn add_benchmark(c: &mut Criterion) { (10..=20).step_by(2).for_each(|log2_size| { let size = 2usize.pow(log2_size); - let arr_a = create_primitive_array_with_seed::(size, DataType::Float32, 0.0, 42); - let arr_b = create_primitive_array_with_seed::(size, DataType::Float32, 0.0, 43); + let arr_a = create_primitive_array_with_seed::(size, 0.0, 42); + let arr_b = create_primitive_array_with_seed::(size, 0.0, 43); c.bench_function(&format!("f32 2^{}", log2_size), |b| { b.iter(|| eq(&arr_a, &arr_b)) diff --git a/benches/concatenate.rs b/benches/concatenate.rs index 7233a15a9d4..7a6148f4553 100644 --- a/benches/concatenate.rs +++ b/benches/concatenate.rs @@ -1,19 +1,16 @@ -extern crate arrow2; +use criterion::{criterion_group, criterion_main, Criterion}; use arrow2::{ compute::concatenate::concatenate, - datatypes::DataType, util::bench_util::{create_boolean_array, create_primitive_array}, }; -use criterion::{criterion_group, criterion_main, Criterion}; - fn add_benchmark(c: &mut Criterion) { (20..=20).step_by(2).for_each(|log2_size| { let size = 2usize.pow(log2_size); - let array1 = create_primitive_array::(8, DataType::Int32, 0.5); - let array2 = create_primitive_array::(size + 1, DataType::Int32, 0.5); + let array1 = create_primitive_array::(8, 0.5); + let array2 = create_primitive_array::(size + 1, 0.5); c.bench_function(&format!("int32 concat aligned 2^{}", log2_size), |b| { b.iter(|| { @@ -21,7 +18,7 @@ fn add_benchmark(c: &mut Criterion) { }) }); - let array1 = create_primitive_array::(9, DataType::Int32, 0.5); + let array1 = create_primitive_array::(9, 0.5); c.bench_function(&format!("int32 concat unaligned 2^{}", log2_size), |b| { b.iter(|| { diff --git a/benches/count_zeros.rs b/benches/count_zeros.rs index a435afd7826..069fa40315b 100644 --- a/benches/count_zeros.rs +++ b/benches/count_zeros.rs @@ -1,7 +1,7 @@ -use arrow2::bitmap::utils::count_zeros; - use criterion::{criterion_group, criterion_main, Criterion}; +use arrow2::bitmap::utils::count_zeros; + fn add_benchmark(c: &mut Criterion) { (10..=20).step_by(2).for_each(|log2_size| { let size = 2usize.pow(log2_size); diff --git a/benches/filter_kernels.rs b/benches/filter_kernels.rs index 424f2884af7..be29dd58ae9 100644 --- a/benches/filter_kernels.rs +++ b/benches/filter_kernels.rs @@ -14,17 +14,15 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -extern crate arrow2; - use std::sync::Arc; +use criterion::{criterion_group, criterion_main, Criterion}; + use arrow2::array::*; use arrow2::compute::filter::{build_filter, filter, filter_record_batch, Filter}; use arrow2::datatypes::{DataType, Field, Schema}; use arrow2::record_batch::RecordBatch; - use arrow2::util::bench_util::{create_boolean_array, create_primitive_array, create_string_array}; -use criterion::{criterion_group, criterion_main, Criterion}; fn bench_filter(data_array: &dyn Array, filter_array: &BooleanArray) { criterion::black_box(filter(data_array, filter_array).unwrap()); @@ -43,12 +41,12 @@ fn add_benchmark(c: &mut Criterion) { let filter_array = BooleanArray::from_data(DataType::Boolean, filter_array.values().clone(), None); - let arr_a = create_primitive_array::(size, DataType::Float32, 0.0); + let arr_a = create_primitive_array::(size, 0.0); c.bench_function(&format!("filter 2^{} f32", log2_size), |b| { b.iter(|| bench_filter(&arr_a, &filter_array)) }); - let arr_a = create_primitive_array::(size, DataType::Float32, 0.1); + let arr_a = create_primitive_array::(size, 0.1); c.bench_function(&format!("filter null 2^{} f32", log2_size), |b| { b.iter(|| bench_filter(&arr_a, &filter_array)) @@ -64,7 +62,7 @@ fn add_benchmark(c: &mut Criterion) { let dense_filter = build_filter(&dense_filter_array).unwrap(); let sparse_filter = build_filter(&sparse_filter_array).unwrap(); - let data_array = create_primitive_array::(size, DataType::UInt8, 0.0); + let data_array = create_primitive_array::(size, 0.0); c.bench_function("filter u8", |b| { b.iter(|| bench_filter(&data_array, &filter_array)) @@ -86,7 +84,7 @@ fn add_benchmark(c: &mut Criterion) { b.iter(|| bench_built_filter(&sparse_filter, &data_array)) }); - let data_array = create_primitive_array::(size, DataType::UInt8, 0.5); + let data_array = create_primitive_array::(size, 0.5); c.bench_function("filter context u8 w NULLs", |b| { b.iter(|| bench_built_filter(&filter, &data_array)) }); @@ -97,7 +95,7 @@ fn add_benchmark(c: &mut Criterion) { b.iter(|| bench_built_filter(&sparse_filter, &data_array)) }); - let data_array = create_primitive_array::(size, DataType::Float32, 0.5); + let data_array = create_primitive_array::(size, 0.5); c.bench_function("filter f32", |b| { b.iter(|| bench_filter(&data_array, &filter_array)) }); @@ -125,7 +123,7 @@ fn add_benchmark(c: &mut Criterion) { b.iter(|| bench_built_filter(&sparse_filter, &data_array)) }); - let data_array = create_primitive_array::(size, DataType::Float32, 0.0); + let data_array = create_primitive_array::(size, 0.0); let field = Field::new("c1", data_array.data_type().clone(), true); let schema = Schema::new(vec![field]); diff --git a/benches/from_trusted_len_iter.rs b/benches/from_trusted_len_iter.rs index a6d972d9222..6cefd9b05dc 100644 --- a/benches/from_trusted_len_iter.rs +++ b/benches/from_trusted_len_iter.rs @@ -1,9 +1,7 @@ -extern crate arrow2; +use criterion::{criterion_group, criterion_main, Criterion}; use arrow2::{array::PrimitiveArray, bitmap::*, buffer::*}; -use criterion::{criterion_group, criterion_main, Criterion}; - fn add_benchmark(c: &mut Criterion) { let values = 0..1026; diff --git a/benches/growable.rs b/benches/growable.rs index e0a4d2426ae..ca7ac8a9045 100644 --- a/benches/growable.rs +++ b/benches/growable.rs @@ -1,17 +1,14 @@ -extern crate arrow2; +use criterion::{criterion_group, criterion_main, Criterion}; use arrow2::{ array::growable::{Growable, GrowablePrimitive}, - datatypes::DataType, util::bench_util::create_primitive_array, }; -use criterion::{criterion_group, criterion_main, Criterion}; - fn add_benchmark(c: &mut Criterion) { let values = (0..1026).rev(); - let i32_array = create_primitive_array::(1026 * 10, DataType::Int32, 0.0); + let i32_array = create_primitive_array::(1026 * 10, 0.0); c.bench_function("growable::primitive::non_null::non_null", |b| { b.iter(|| { let mut a = GrowablePrimitive::new(vec![&i32_array], false, 1026 * 10); @@ -22,7 +19,7 @@ fn add_benchmark(c: &mut Criterion) { }) }); - let i32_array = create_primitive_array::(1026 * 10, DataType::Int32, 0.0); + let i32_array = create_primitive_array::(1026 * 10, 0.0); c.bench_function("growable::primitive::non_null::null", |b| { b.iter(|| { let mut a = GrowablePrimitive::new(vec![&i32_array], true, 1026 * 10); @@ -36,7 +33,7 @@ fn add_benchmark(c: &mut Criterion) { }) }); - let i32_array = create_primitive_array::(1026 * 10, DataType::Int32, 0.1); + let i32_array = create_primitive_array::(1026 * 10, 0.1); let values = values.collect::>(); c.bench_function("growable::primitive::null::non_null", |b| { diff --git a/benches/hash_kernel.rs b/benches/hash_kernel.rs index 7f5778044f8..bf129db6fe1 100644 --- a/benches/hash_kernel.rs +++ b/benches/hash_kernel.rs @@ -1,20 +1,17 @@ -extern crate arrow2; +use criterion::{criterion_group, criterion_main, Criterion}; use arrow2::compute::hash::hash; -use arrow2::datatypes::DataType; use arrow2::util::bench_util::*; -use criterion::{criterion_group, criterion_main, Criterion}; - fn add_benchmark(c: &mut Criterion) { let log2_size = 10; let size = 2usize.pow(log2_size); - let arr_a = create_primitive_array::(size, DataType::Int32, 0.0); + let arr_a = create_primitive_array::(size, 0.0); c.bench_function(&format!("i32 2^{}", log2_size), |b| b.iter(|| hash(&arr_a))); - let arr_a = create_primitive_array::(size, DataType::Int64, 0.0); + let arr_a = create_primitive_array::(size, 0.0); c.bench_function(&format!("i64 2^{}", log2_size), |b| b.iter(|| hash(&arr_a))); diff --git a/benches/iter_list.rs b/benches/iter_list.rs index 87869943b85..c629cb4ba4c 100644 --- a/benches/iter_list.rs +++ b/benches/iter_list.rs @@ -1,15 +1,15 @@ +use std::iter::FromIterator; +use std::sync::Arc; + +use criterion::{criterion_group, criterion_main, Criterion}; + use arrow2::{ array::{ListArray, PrimitiveArray}, - buffer::Buffer, + bitmap::Bitmap, + buffer::{Buffer, MutableBuffer}, datatypes::DataType, }; -use arrow2::bitmap::Bitmap; -use arrow2::buffer::MutableBuffer; -use criterion::{criterion_group, criterion_main, Criterion}; -use std::iter::FromIterator; -use std::sync::Arc; - fn add_benchmark(c: &mut Criterion) { (10..=20).step_by(2).for_each(|log2_size| { let size = 2usize.pow(log2_size); diff --git a/benches/iter_utf8.rs b/benches/iter_utf8.rs index c17c817b359..44a85d24606 100644 --- a/benches/iter_utf8.rs +++ b/benches/iter_utf8.rs @@ -1,7 +1,7 @@ -use arrow2::array::Utf8Array; - use criterion::{criterion_group, criterion_main, Criterion}; +use arrow2::array::Utf8Array; + fn add_benchmark(c: &mut Criterion) { (10..=20).step_by(2).for_each(|log2_size| { let size = 2usize.pow(log2_size); diff --git a/benches/length_kernel.rs b/benches/length_kernel.rs index f9e982b9459..a5fc2ab08d4 100644 --- a/benches/length_kernel.rs +++ b/benches/length_kernel.rs @@ -15,9 +15,7 @@ // specific language governing permissions and limitations // under the License. -#[macro_use] -extern crate criterion; -use criterion::Criterion; +use criterion::{criterion_group, criterion_main, Criterion}; use arrow2::array::*; use arrow2::compute::length::length; diff --git a/benches/sort_kernel.rs b/benches/sort_kernel.rs index 7294aa71d63..b0736a27562 100644 --- a/benches/sort_kernel.rs +++ b/benches/sort_kernel.rs @@ -15,13 +15,11 @@ // specific language governing permissions and limitations // under the License. -#[macro_use] -extern crate criterion; -use criterion::Criterion; +use criterion::{criterion_group, criterion_main, Criterion}; +use arrow2::array::*; use arrow2::compute::sort::{lexsort, sort, sort_to_indices, SortColumn, SortOptions}; use arrow2::util::bench_util::*; -use arrow2::{array::*, datatypes::*}; fn bench_lexsort(arr_a: &dyn Array, array_b: &dyn Array) { let columns = vec![ @@ -54,7 +52,7 @@ fn bench_sort_limit(arr_a: &dyn Array) { fn add_benchmark(c: &mut Criterion) { (10..=20).step_by(2).for_each(|log2_size| { let size = 2usize.pow(log2_size); - let arr_a = create_primitive_array::(size, DataType::Float32, 0.0); + let arr_a = create_primitive_array::(size, 0.0); c.bench_function(&format!("sort 2^{} f32", log2_size), |b| { b.iter(|| bench_sort(&arr_a)) @@ -64,18 +62,18 @@ fn add_benchmark(c: &mut Criterion) { b.iter(|| bench_sort_limit(&arr_a)) }); - let arr_b = create_primitive_array_with_seed::(size, DataType::Float32, 0.0, 43); + let arr_b = create_primitive_array_with_seed::(size, 0.0, 43); c.bench_function(&format!("lexsort 2^{} f32", log2_size), |b| { b.iter(|| bench_lexsort(&arr_a, &arr_b)) }); - let arr_a = create_primitive_array::(size, DataType::Float32, 0.5); + let arr_a = create_primitive_array::(size, 0.5); c.bench_function(&format!("sort null 2^{} f32", log2_size), |b| { b.iter(|| bench_sort(&arr_a)) }); - let arr_b = create_primitive_array_with_seed::(size, DataType::Float32, 0.5, 43); + let arr_b = create_primitive_array_with_seed::(size, 0.5, 43); c.bench_function(&format!("lexsort null 2^{} f32", log2_size), |b| { b.iter(|| bench_lexsort(&arr_a, &arr_b)) }); diff --git a/benches/take_kernels.rs b/benches/take_kernels.rs index 646d0ebd3ca..0ae1e482965 100644 --- a/benches/take_kernels.rs +++ b/benches/take_kernels.rs @@ -2,17 +2,12 @@ use rand::{rngs::StdRng, Rng, SeedableRng}; use criterion::{criterion_group, criterion_main, Criterion}; +use arrow2::array::*; use arrow2::compute::take; use arrow2::util::bench_util::*; -use arrow2::{array::*, datatypes::DataType}; - -/// Returns fixed seedable RNG -pub fn seedable_rng() -> StdRng { - StdRng::seed_from_u64(42) -} fn create_random_index(size: usize, null_density: f32) -> PrimitiveArray { - let mut rng = seedable_rng(); + let mut rng = StdRng::seed_from_u64(42); (0..size) .map(|_| { if rng.gen::() > null_density { @@ -33,8 +28,8 @@ fn add_benchmark(c: &mut Criterion) { (10..=20).step_by(2).for_each(|log2_size| { let size = 2usize.pow(log2_size); - let values = create_primitive_array::(size, DataType::Int32, 0.0); - let values_nulls = create_primitive_array::(size, DataType::Int32, 0.2); + let values = create_primitive_array::(size, 0.0); + let values_nulls = create_primitive_array::(size, 0.2); let indices = create_random_index(size, 0.0); let indices_nulls = create_random_index(size, 0.5); c.bench_function(&format!("take i32 2^{}", log2_size), |b| { diff --git a/benches/unset_count.rs b/benches/unset_count.rs index a435afd7826..069fa40315b 100644 --- a/benches/unset_count.rs +++ b/benches/unset_count.rs @@ -1,7 +1,7 @@ -use arrow2::bitmap::utils::count_zeros; - use criterion::{criterion_group, criterion_main, Criterion}; +use arrow2::bitmap::utils::count_zeros; + fn add_benchmark(c: &mut Criterion) { (10..=20).step_by(2).for_each(|log2_size| { let size = 2usize.pow(log2_size); diff --git a/benches/write_csv.rs b/benches/write_csv.rs index 5427acf7c65..3afa5952aae 100644 --- a/benches/write_csv.rs +++ b/benches/write_csv.rs @@ -1,13 +1,12 @@ use std::sync::Arc; -use arrow2::util::bench_util::*; use criterion::{criterion_group, criterion_main, Criterion}; use arrow2::array::*; -use arrow2::datatypes::*; use arrow2::error::Result; use arrow2::io::csv::write; use arrow2::record_batch::RecordBatch; +use arrow2::util::bench_util::*; fn write_batch(batch: &RecordBatch) -> Result<()> { let writer = &mut write::WriterBuilder::new().from_writer(vec![]); @@ -19,19 +18,14 @@ fn write_batch(batch: &RecordBatch) -> Result<()> { } fn make_batch(array: impl Array + 'static) -> RecordBatch { - let schema = Arc::new(Schema::new(vec![Field::new( - "a", - array.data_type().clone(), - true, - )])); - RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap() + RecordBatch::try_from_iter([("a", Arc::new(array) as Arc)]).unwrap() } fn add_benchmark(c: &mut Criterion) { (10..=18).step_by(2).for_each(|log2_size| { let size = 2usize.pow(log2_size); - let array = create_primitive_array::(size, DataType::Int32, 0.1); + let array = create_primitive_array::(size, 0.1); let batch = make_batch(array); c.bench_function(&format!("csv write i32 2^{}", log2_size), |b| { @@ -45,7 +39,7 @@ fn add_benchmark(c: &mut Criterion) { b.iter(|| write_batch(&batch)) }); - let array = create_primitive_array::(size, DataType::Float64, 0.1); + let array = create_primitive_array::(size, 0.1); let batch = make_batch(array); c.bench_function(&format!("csv write f64 2^{}", log2_size), |b| { diff --git a/benches/write_ipc.rs b/benches/write_ipc.rs index 96b17299b75..77b1ab95905 100644 --- a/benches/write_ipc.rs +++ b/benches/write_ipc.rs @@ -1,13 +1,13 @@ use std::io::Cursor; use std::sync::Arc; -use arrow2::record_batch::RecordBatch; use criterion::{criterion_group, criterion_main, Criterion}; use arrow2::array::*; -use arrow2::datatypes::{DataType, Field, Schema}; +use arrow2::datatypes::{Field, Schema}; use arrow2::error::Result; use arrow2::io::ipc::write::*; +use arrow2::record_batch::RecordBatch; use arrow2::util::bench_util::{create_boolean_array, create_primitive_array, create_string_array}; fn write(array: &dyn Array) -> Result<()> { @@ -23,7 +23,7 @@ fn write(array: &dyn Array) -> Result<()> { fn add_benchmark(c: &mut Criterion) { (0..=10).step_by(2).for_each(|i| { - let array = &create_primitive_array::(1024 * 2usize.pow(i), DataType::Int64, 0.1); + let array = &create_primitive_array::(1024 * 2usize.pow(i), 0.1); let a = format!("write i64 2^{}", 10 + i); c.bench_function(&a, |b| b.iter(|| write(array).unwrap())); }); diff --git a/benches/write_parquet.rs b/benches/write_parquet.rs index 5ffa69d762e..df89c36d777 100644 --- a/benches/write_parquet.rs +++ b/benches/write_parquet.rs @@ -2,15 +2,15 @@ use std::io::Cursor; use criterion::{criterion_group, criterion_main, Criterion}; -use arrow2::array::*; -use arrow2::datatypes::{DataType, Field, Schema}; +use arrow2::array::{clone, Array}; use arrow2::error::Result; use arrow2::io::parquet::write::*; +use arrow2::record_batch::RecordBatch; use arrow2::util::bench_util::{create_boolean_array, create_primitive_array, create_string_array}; fn write(array: &dyn Array, encoding: Encoding) -> Result<()> { - let field = Field::new("c1", array.data_type().clone(), true); - let schema = Schema::new(vec![field]); + let batch = RecordBatch::try_from_iter([("c1", clone(array).into())])?; + let schema = batch.schema().clone(); let options = WriteOptions { write_statistics: false, @@ -18,23 +18,19 @@ fn write(array: &dyn Array, encoding: Encoding) -> Result<()> { version: Version::V1, }; - let parquet_schema = to_parquet_schema(&schema)?; - - let row_groups = std::iter::once(Result::Ok(DynIter::new(std::iter::once(Ok(DynIter::new( - std::iter::once(array_to_page( - array, - parquet_schema.columns()[0].clone(), - options, - encoding, - )), - )))))); + let row_groups = RowGroupIterator::try_new( + vec![Ok(batch)].into_iter(), + &schema, + options, + vec![encoding], + )?; let mut writer = Cursor::new(vec![]); write_file( &mut writer, row_groups, &schema, - parquet_schema, + to_parquet_schema(&schema)?, options, None, )?; @@ -43,7 +39,7 @@ fn write(array: &dyn Array, encoding: Encoding) -> Result<()> { fn add_benchmark(c: &mut Criterion) { (0..=10).step_by(2).for_each(|i| { - let array = &create_primitive_array::(1024 * 2usize.pow(i), DataType::Int64, 0.1); + let array = &create_primitive_array::(1024 * 2usize.pow(i), 0.1); let a = format!("write i64 2^{}", 10 + i); c.bench_function(&a, |b| b.iter(|| write(array, Encoding::Plain).unwrap())); }); diff --git a/src/util/bench_util.rs b/src/util/bench_util.rs index a1707bd66a3..d463f764314 100644 --- a/src/util/bench_util.rs +++ b/src/util/bench_util.rs @@ -1,27 +1,10 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Utils to make benchmarking easier +//! Utilities for benchmarking use rand::distributions::{Alphanumeric, Distribution, Standard}; use rand::{rngs::StdRng, Rng, SeedableRng}; use crate::types::NaturalDataType; -use crate::{array::*, datatypes::*, types::NativeType}; +use crate::{array::*, types::NativeType}; /// Returns fixed seedable RNG pub fn seedable_rng() -> StdRng { @@ -29,11 +12,7 @@ pub fn seedable_rng() -> StdRng { } /// Creates an random (but fixed-seeded) array of a given size and null density -pub fn create_primitive_array( - size: usize, - data_type: DataType, - null_density: f32, -) -> PrimitiveArray +pub fn create_primitive_array(size: usize, null_density: f32) -> PrimitiveArray where T: NativeType + NaturalDataType, Standard: Distribution, @@ -49,13 +28,11 @@ where } }) .collect::>() - .to(data_type) } /// Creates a new [`PrimitiveArray`] from random values with a pre-set seed. pub fn create_primitive_array_with_seed( size: usize, - data_type: DataType, null_density: f32, seed: u64, ) -> PrimitiveArray @@ -74,7 +51,6 @@ where } }) .collect::>() - .to(data_type) } /// Creates an random (but fixed-seeded) array of a given size and null density