From c31a543ed91f9150ca4fb9baa675d226759aad8f Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sun, 10 Oct 2021 09:25:47 +0200 Subject: [PATCH 1/2] cargo fmt --- src/array/utf8/ffi.rs | 4 +- src/io/parquet/read/mod.rs | 72 ++++++++++++++----------- src/io/parquet/read/schema/convert.rs | 2 +- src/io/parquet/write/fixed_len_bytes.rs | 8 +-- src/scalar/primitive.rs | 2 +- 5 files changed, 50 insertions(+), 38 deletions(-) diff --git a/src/array/utf8/ffi.rs b/src/array/utf8/ffi.rs index c9600d7d9d9..4cc716b8e4d 100644 --- a/src/array/utf8/ffi.rs +++ b/src/array/utf8/ffi.rs @@ -33,6 +33,8 @@ impl FromFfi for Utf8Array { validity = validity.map(|x| x.slice(offset, length)) } let data_type = Self::default_data_type(); - Ok(Self::from_data_unchecked(data_type, offsets, values, validity)) + Ok(Self::from_data_unchecked( + data_type, offsets, values, validity, + )) } } diff --git a/src/io/parquet/read/mod.rs b/src/io/parquet/read/mod.rs index f05f2669936..cb6f93ec87e 100644 --- a/src/io/parquet/read/mod.rs +++ b/src/io/parquet/read/mod.rs @@ -1,5 +1,9 @@ //! APIs to read from Parquet format. -use std::{convert::TryInto, io::{Read, Seek}, sync::Arc}; +use std::{ + convert::TryInto, + io::{Read, Seek}, + sync::Arc, +}; use futures::{AsyncRead, AsyncSeek, Stream}; pub use parquet2::{ @@ -18,7 +22,11 @@ pub use parquet2::{ types::int96_to_i64_ns, }; -use crate::{array::{Array, DictionaryKey, PrimitiveArray}, datatypes::{DataType, IntervalUnit, TimeUnit}, error::{ArrowError, Result}}; +use crate::{ + array::{Array, DictionaryKey, PrimitiveArray}, + datatypes::{DataType, IntervalUnit, TimeUnit}, + error::{ArrowError, Result}, +}; mod binary; mod boolean; @@ -205,19 +213,13 @@ pub fn page_iter_to_array< iter, data_type, metadata, )?)), Decimal(_, _) => match metadata.descriptor().type_() { - ParquetType::PrimitiveType { physical_type, ..} => match physical_type{ - PhysicalType::Int32 => primitive::iter_to_array( - iter, - metadata, - data_type, - |x: i32| x as i128, - ), - PhysicalType::Int64 => primitive::iter_to_array( - iter, - metadata, - data_type, - |x: i64| x as i128, - ), + ParquetType::PrimitiveType { physical_type, .. } => match physical_type { + PhysicalType::Int32 => { + primitive::iter_to_array(iter, metadata, data_type, |x: i32| x as i128) + } + PhysicalType::Int64 => { + primitive::iter_to_array(iter, metadata, data_type, |x: i64| x as i128) + } PhysicalType::FixedLenByteArray(n) => { if *n > 16 { Err(ArrowError::NotYetImplemented(format!( @@ -225,25 +227,33 @@ pub fn page_iter_to_array< n ))) } else { - let paddings = (0..(16-*n)).map(|_| 0u8).collect::>(); - fixed_size_binary::iter_to_array(iter, DataType::FixedSizeBinary(*n), metadata) - .map(|e|{ - let a = e.into_iter().map(|v| - v.and_then(|v1| { - [&paddings, v1].concat().try_into().map( - |pad16| i128::from_be_bytes(pad16) - ).ok() - } - ) - ).collect::>(); - Box::new(PrimitiveArray::::from(a).to(data_type)) as Box - } + let paddings = (0..(16 - *n)).map(|_| 0u8).collect::>(); + fixed_size_binary::iter_to_array( + iter, + DataType::FixedSizeBinary(*n), + metadata, ) + .map(|e| { + let a = e + .into_iter() + .map(|v| { + v.and_then(|v1| { + [&paddings, v1] + .concat() + .try_into() + .map(|pad16| i128::from_be_bytes(pad16)) + .ok() + }) + }) + .collect::>(); + Box::new(PrimitiveArray::::from(a).to(data_type)) + as Box + }) } - }, - _ => unreachable!() + } + _ => unreachable!(), }, - _ => unreachable!() + _ => unreachable!(), }, List(ref inner) => match inner.data_type() { UInt8 => primitive::iter_to_array_nested(iter, metadata, data_type, |x: i32| x as u8), diff --git a/src/io/parquet/read/schema/convert.rs b/src/io/parquet/read/schema/convert.rs index 14c7c7c2edc..45b10ca99db 100644 --- a/src/io/parquet/read/schema/convert.rs +++ b/src/io/parquet/read/schema/convert.rs @@ -167,7 +167,7 @@ pub fn from_int64( ParquetTimeUnit::MICROS(_) => DataType::Time64(TimeUnit::Microsecond), ParquetTimeUnit::NANOS(_) => DataType::Time64(TimeUnit::Nanosecond), }, - (Some(PrimitiveConvertedType::Decimal(precision,scale)), _) => { + (Some(PrimitiveConvertedType::Decimal(precision, scale)), _) => { DataType::Decimal(*precision as usize, *scale as usize) } (c, l) => { diff --git a/src/io/parquet/write/fixed_len_bytes.rs b/src/io/parquet/write/fixed_len_bytes.rs index 01069f6d0b0..0f64b2f5214 100644 --- a/src/io/parquet/write/fixed_len_bytes.rs +++ b/src/io/parquet/write/fixed_len_bytes.rs @@ -3,7 +3,7 @@ use parquet2::{ encoding::Encoding, metadata::ColumnDescriptor, page::CompressedDataPage, - statistics::{serialize_statistics, deserialize_statistics, ParquetStatistics}, + statistics::{deserialize_statistics, serialize_statistics, ParquetStatistics}, write::WriteOptions, }; @@ -98,7 +98,7 @@ pub(super) fn build_statistics( .min_by(|x, y| ord_binary(x, y)) .map(|x| x.to_vec()), }; - deserialize_statistics(pq_statistics,descriptor).map( - |e| serialize_statistics(&*e) - ).ok() + deserialize_statistics(pq_statistics, descriptor) + .map(|e| serialize_statistics(&*e)) + .ok() } diff --git a/src/scalar/primitive.rs b/src/scalar/primitive.rs index f5796fffbb7..83c78fef643 100644 --- a/src/scalar/primitive.rs +++ b/src/scalar/primitive.rs @@ -1,7 +1,7 @@ use crate::{ datatypes::DataType, - types::{NativeType, NaturalDataType}, error::ArrowError, + types::{NativeType, NaturalDataType}, }; use super::Scalar; From 577081cee2e4acb7a6bfc44d9ac531daa834ca2d Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sun, 10 Oct 2021 10:11:33 +0200 Subject: [PATCH 2/2] clippy lints --- src/array/binary/mutable.rs | 2 +- src/array/display.rs | 6 +++--- src/array/ffi.rs | 2 ++ src/array/utf8/mutable.rs | 2 +- src/bitmap/bitmap_ops.rs | 2 +- src/bitmap/mutable.rs | 2 +- src/compute/arithmetics/decimal/add.rs | 8 +++++--- src/compute/arithmetics/decimal/div.rs | 11 +++++------ src/compute/arithmetics/decimal/mul.rs | 11 +++++------ src/compute/arithmetics/decimal/sub.rs | 8 +++++--- src/compute/cast/mod.rs | 8 +------- src/compute/sort/primitive/sort.rs | 4 ++-- src/io/avro/read/mod.rs | 4 +--- src/io/parquet/read/mod.rs | 2 +- src/io/parquet/read/primitive/basic.rs | 2 +- src/io/parquet/read/primitive/nested.rs | 2 +- src/io/parquet/read/schema/convert.rs | 4 ++-- src/io/parquet/read/statistics/fixlen.rs | 4 ++-- src/io/parquet/write/schema.rs | 2 +- src/trusted_len.rs | 4 ++++ 20 files changed, 45 insertions(+), 45 deletions(-) diff --git a/src/array/binary/mutable.rs b/src/array/binary/mutable.rs index f77109e2abf..28b1ebb08df 100644 --- a/src/array/binary/mutable.rs +++ b/src/array/binary/mutable.rs @@ -335,7 +335,7 @@ impl MutableBinaryArray { extend_from_trusted_len_iter( &mut self.offsets, &mut self.values, - &mut self.validity.as_mut().unwrap(), + self.validity.as_mut().unwrap(), iterator, ); diff --git a/src/array/display.rs b/src/array/display.rs index 915c02ea7f6..886f6c1bc79 100644 --- a/src/array/display.rs +++ b/src/array/display.rs @@ -149,7 +149,7 @@ pub fn get_value_display<'a>(array: &'a dyn Array) -> Box Strin List(_) => { let f = |x: Box| { let display = get_value_display(x.as_ref()); - let string_values = (0..x.len()).map(|i| display(i)).collect::>(); + let string_values = (0..x.len()).map(display).collect::>(); format!("[{}]", string_values.join(", ")) }; dyn_display!(array, ListArray, f) @@ -157,7 +157,7 @@ pub fn get_value_display<'a>(array: &'a dyn Array) -> Box Strin FixedSizeList(_, _) => { let f = |x: Box| { let display = get_value_display(x.as_ref()); - let string_values = (0..x.len()).map(|i| display(i)).collect::>(); + let string_values = (0..x.len()).map(display).collect::>(); format!("[{}]", string_values.join(", ")) }; dyn_display!(array, FixedSizeListArray, f) @@ -165,7 +165,7 @@ pub fn get_value_display<'a>(array: &'a dyn Array) -> Box Strin LargeList(_) => { let f = |x: Box| { let display = get_value_display(x.as_ref()); - let string_values = (0..x.len()).map(|i| display(i)).collect::>(); + let string_values = (0..x.len()).map(display).collect::>(); format!("[{}]", string_values.join(", ")) }; dyn_display!(array, ListArray, f) diff --git a/src/array/ffi.rs b/src/array/ffi.rs index 0b32adaf4e1..176b0966ff5 100644 --- a/src/array/ffi.rs +++ b/src/array/ffi.rs @@ -7,6 +7,8 @@ use crate::error::Result; /// Trait describing how a struct presents itself to the /// [C data interface](https://arrow.apache.org/docs/format/CDataInterface.html) (FFI). +/// Safety: +/// Implementing this trait incorrect will lead to UB pub unsafe trait ToFfi { /// The pointers to the buffers. fn buffers(&self) -> Vec>>; diff --git a/src/array/utf8/mutable.rs b/src/array/utf8/mutable.rs index bf7c7f9d68a..4503265e3d0 100644 --- a/src/array/utf8/mutable.rs +++ b/src/array/utf8/mutable.rs @@ -306,7 +306,7 @@ impl MutableUtf8Array { extend_from_trusted_len_iter( &mut self.offsets, &mut self.values, - &mut self.validity.as_mut().unwrap(), + self.validity.as_mut().unwrap(), iterator, ); diff --git a/src/bitmap/bitmap_ops.rs b/src/bitmap/bitmap_ops.rs index 88a79993611..8a95b5f10c7 100644 --- a/src/bitmap/bitmap_ops.rs +++ b/src/bitmap/bitmap_ops.rs @@ -110,7 +110,7 @@ where { let rem = op(iter.remainder()); - let iterator = iter.map(|left| op(left)).chain(std::iter::once(rem)); + let iterator = iter.map(op).chain(std::iter::once(rem)); let buffer = MutableBuffer::from_chunk_iter(iterator); diff --git a/src/bitmap/mutable.rs b/src/bitmap/mutable.rs index 798ac406859..58631abde76 100644 --- a/src/bitmap/mutable.rs +++ b/src/bitmap/mutable.rs @@ -206,7 +206,7 @@ impl MutableBitmap { /// Panics iff `index >= self.len()`. #[inline] pub fn set(&mut self, index: usize, value: bool) { - set_bit(&mut self.buffer.as_mut_slice(), index, value) + set_bit(self.buffer.as_mut_slice(), index, value) } /// Shrinks the capacity of the [`MutableBitmap`] to fit its current length. diff --git a/src/compute/arithmetics/decimal/add.rs b/src/compute/arithmetics/decimal/add.rs index 76a17a09afa..9877af9ca7a 100644 --- a/src/compute/arithmetics/decimal/add.rs +++ b/src/compute/arithmetics/decimal/add.rs @@ -65,9 +65,11 @@ pub fn add(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> Result max_value(*lhs_p) { - panic!("Overflow in addition presented for precision {}", lhs_p); - } + assert!( + !(res.abs() > max_value(*lhs_p)), + "Overflow in addition presented for precision {}", + lhs_p + ); res }; diff --git a/src/compute/arithmetics/decimal/div.rs b/src/compute/arithmetics/decimal/div.rs index 1a88881bace..dde60d3a1ab 100644 --- a/src/compute/arithmetics/decimal/div.rs +++ b/src/compute/arithmetics/decimal/div.rs @@ -80,12 +80,11 @@ pub fn div(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> Result max_value(*lhs_p) { - panic!( - "Overflow in multiplication presented for precision {}", - lhs_p - ); - } + assert!( + !(res.abs() > max_value(*lhs_p)), + "Overflow in multiplication presented for precision {}", + lhs_p + ); res }; diff --git a/src/compute/arithmetics/decimal/mul.rs b/src/compute/arithmetics/decimal/mul.rs index 8ea10b49f69..8c9fa321fb4 100644 --- a/src/compute/arithmetics/decimal/mul.rs +++ b/src/compute/arithmetics/decimal/mul.rs @@ -78,12 +78,11 @@ pub fn mul(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> Result max_value(*lhs_p) { - panic!( - "Overflow in multiplication presented for precision {}", - lhs_p - ); - } + assert!( + !(res.abs() > max_value(*lhs_p)), + "Overflow in multiplication presented for precision {}", + lhs_p + ); res }; diff --git a/src/compute/arithmetics/decimal/sub.rs b/src/compute/arithmetics/decimal/sub.rs index 8a692c7339d..7d97e019dc0 100644 --- a/src/compute/arithmetics/decimal/sub.rs +++ b/src/compute/arithmetics/decimal/sub.rs @@ -64,9 +64,11 @@ pub fn sub(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> Result max_value(*lhs_p) { - panic!("Overflow in subtract presented for precision {}", lhs_p); - } + assert!( + !(res.abs() > max_value(*lhs_p)), + "Overflow in subtract presented for precision {}", + lhs_p + ); res }; diff --git a/src/compute/cast/mod.rs b/src/compute/cast/mod.rs index 73282130424..95975c42e26 100644 --- a/src/compute/cast/mod.rs +++ b/src/compute/cast/mod.rs @@ -20,7 +20,7 @@ pub use primitive_to::*; pub use utf8_to::*; /// options defining how Cast kernels behave -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, Default)] struct CastOptions { /// default to false /// whether an overflowing cast should be converted to `None` (default), or be wrapped (i.e. `256i16 as u8 = 0` vectorized). @@ -28,12 +28,6 @@ struct CastOptions { wrapped: bool, } -impl Default for CastOptions { - fn default() -> Self { - Self { wrapped: false } - } -} - impl CastOptions { fn with_wrapped(&self, v: bool) -> Self { let mut option = *self; diff --git a/src/compute/sort/primitive/sort.rs b/src/compute/sort/primitive/sort.rs index de6aa58867b..144b96cc19a 100644 --- a/src/compute/sort/primitive/sort.rs +++ b/src/compute/sort/primitive/sort.rs @@ -112,7 +112,7 @@ where // sort all non-null values sort_values( - &mut buffer.as_mut_slice(), + buffer.as_mut_slice(), cmp, options.descending, limit - validity.null_count(), @@ -153,7 +153,7 @@ where let mut buffer = MutableBuffer::::new(); buffer.extend_from_slice(values); - sort_values(&mut buffer.as_mut_slice(), cmp, options.descending, limit); + sort_values(buffer.as_mut_slice(), cmp, options.descending, limit); buffer.truncate(limit); buffer.shrink_to_fit(); diff --git a/src/io/avro/read/mod.rs b/src/io/avro/read/mod.rs index 02aa6a7dd4b..19440ee1559 100644 --- a/src/io/avro/read/mod.rs +++ b/src/io/avro/read/mod.rs @@ -63,9 +63,7 @@ fn read_block(reader: &mut R, buf: &mut Vec, file_marker: [u8; 16]) let mut marker = [0u8; 16]; reader.read_exact(&mut marker)?; - if marker != file_marker { - panic!(); - } + assert!(!(marker != file_marker)); Ok(rows) } diff --git a/src/io/parquet/read/mod.rs b/src/io/parquet/read/mod.rs index cb6f93ec87e..bb05a744633 100644 --- a/src/io/parquet/read/mod.rs +++ b/src/io/parquet/read/mod.rs @@ -241,7 +241,7 @@ pub fn page_iter_to_array< [&paddings, v1] .concat() .try_into() - .map(|pad16| i128::from_be_bytes(pad16)) + .map(i128::from_be_bytes) .ok() }) }) diff --git a/src/io/parquet/read/primitive/basic.rs b/src/io/parquet/read/primitive/basic.rs index a783f001a4a..6662b247304 100644 --- a/src/io/parquet/read/primitive/basic.rs +++ b/src/io/parquet/read/primitive/basic.rs @@ -160,7 +160,7 @@ fn read_required( assert_eq!(values_buffer.len(), additional * std::mem::size_of::()); let iterator = ExactChunksIter::::new(values_buffer); - let iterator = iterator.map(|value| op(value)); + let iterator = iterator.map(op); values.extend_from_trusted_len_iter(iterator); } diff --git a/src/io/parquet/read/primitive/nested.rs b/src/io/parquet/read/primitive/nested.rs index fab3646bd8f..237f23ee83e 100644 --- a/src/io/parquet/read/primitive/nested.rs +++ b/src/io/parquet/read/primitive/nested.rs @@ -45,7 +45,7 @@ where A: ArrowNativeType, F: Fn(T) -> A, { - let iterator = new_values.map(|v| op(v)); + let iterator = new_values.map(op); values.extend_from_trusted_len_iter(iterator); } diff --git a/src/io/parquet/read/schema/convert.rs b/src/io/parquet/read/schema/convert.rs index 45b10ca99db..3b7502f1b0d 100644 --- a/src/io/parquet/read/schema/convert.rs +++ b/src/io/parquet/read/schema/convert.rs @@ -49,7 +49,7 @@ pub fn parquet_to_arrow_schema( schema .fields() .iter() - .map(|t| to_field(t)) + .map(to_field) .filter_map(|x| x.transpose()) .collect::>>() .map(|fields| Schema::new_from(fields, metadata)) @@ -284,7 +284,7 @@ fn to_group_type_inner( fn to_struct(fields: &[ParquetType]) -> Result> { fields .iter() - .map(|field| to_field(field)) + .map(to_field) .collect::>>>() .map(|result| result.into_iter().flatten().collect::>()) .map(|fields| { diff --git a/src/io/parquet/read/statistics/fixlen.rs b/src/io/parquet/read/statistics/fixlen.rs index f550b3b63ad..62413533c79 100644 --- a/src/io/parquet/read/statistics/fixlen.rs +++ b/src/io/parquet/read/statistics/fixlen.rs @@ -63,7 +63,7 @@ impl TryFrom<(&ParquetFixedLenStatistics, DataType)> for PrimitiveStatistics for PrimitiveStatistics Result { // recursively convert children to types/nodes let fields = fields .iter() - .map(|f| to_parquet_type(f)) + .map(to_parquet_type) .collect::>>()?; Ok(ParquetType::try_from_group( name, repetition, None, None, fields, None, diff --git a/src/trusted_len.rs b/src/trusted_len.rs index 340d6cde828..98ff458e56a 100644 --- a/src/trusted_len.rs +++ b/src/trusted_len.rs @@ -5,6 +5,10 @@ use std::slice::Iter; /// A trait denoting Rusts' unstable [TrustedLen](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html). /// This is re-defined here and implemented for some iterators until `std::iter::TrustedLen` /// is stabilized. +/// +/// # Safety +/// This trait must only be implemented when the contract is upheld. +/// Consumers of this trait must inspect Iterator::size_hint()’s upper bound. pub unsafe trait TrustedLen: Iterator {} unsafe impl TrustedLen for Iter<'_, T> {}