Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Added support for decimal 256 #1194

Merged
merged 2 commits into from Sep 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.toml
Expand Up @@ -22,6 +22,9 @@ dyn-clone = "1"
bytemuck = { version = "1", features = ["derive"] }
chrono = { version = "0.4", default_features = false, features = ["std"] }

# for decimal i256
ethnum = "1"

# We need to Hash values before sending them to an hasher. This
# crate provides HashMap that assumes pre-hashed values.
hash_hasher = "^2.0.3"
Expand Down
13 changes: 13 additions & 0 deletions arrow-pyarrow-integration-testing/tests/test_sql.py
Expand Up @@ -116,6 +116,19 @@ def test_decimal_roundtrip(self):
b = arrow_pyarrow_integration_testing.round_trip_array(a)
self.assertEqual(a, b)

def test_decimal256_roundtrip(self):
"""
Python -> Rust -> Python
"""
data = [
round(decimal.Decimal(722.82), 2),
round(decimal.Decimal(-934.11), 2),
None,
]
a = pyarrow.array(data, pyarrow.decimal256(5, 2))
b = arrow_pyarrow_integration_testing.round_trip_array(a)
self.assertEqual(a, b)

def test_list_array(self):
"""
Python -> Rust -> Python
Expand Down
3 changes: 2 additions & 1 deletion src/array/mod.rs
Expand Up @@ -206,13 +206,14 @@ macro_rules! with_match_primitive_type {(
) => ({
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
use crate::datatypes::PrimitiveType::*;
use crate::types::{days_ms, months_days_ns, f16};
use crate::types::{days_ms, months_days_ns, f16, i256};
match $key_type {
Int8 => __with_ty__! { i8 },
Int16 => __with_ty__! { i16 },
Int32 => __with_ty__! { i32 },
Int64 => __with_ty__! { i64 },
Int128 => __with_ty__! { i128 },
Int256 => __with_ty__! { i256 },
DaysMs => __with_ty__! { days_ms },
MonthDayNano => __with_ty__! { months_days_ns },
UInt8 => __with_ty__! { u8 },
Expand Down
19 changes: 15 additions & 4 deletions src/array/primitive/fmt.rs
Expand Up @@ -2,7 +2,7 @@ use std::fmt::{Debug, Formatter, Result, Write};

use crate::array::Array;
use crate::datatypes::{IntervalUnit, TimeUnit};
use crate::types::{days_ms, months_days_ns};
use crate::types::{days_ms, i256, months_days_ns};

use super::PrimitiveArray;
use crate::array::fmt::write_vec;
Expand Down Expand Up @@ -118,13 +118,24 @@ pub fn get_write_value<'a, T: NativeType, F: Write>(
Decimal(_, scale) => {
// The number 999.99 has a precision of 5 and scale of 2
let scale = *scale as u32;
let display = move |x| {
let base = x / 10i128.pow(scale);
let decimals = x - base * 10i128.pow(scale);
let factor = 10i128.pow(scale);
let display = move |x: i128| {
let base = x / factor;
let decimals = (x - base * factor).abs();
format!("{}.{}", base, decimals)
};
dyn_primitive!(array, i128, display)
}
Decimal256(_, scale) => {
let scale = *scale as u32;
let factor = (ethnum::I256::ONE * 10).pow(scale);
let display = move |x: i256| {
let base = x.0 / factor;
let decimals = (x.0 - base * factor).abs();
format!("{}.{}", base, decimals)
};
dyn_primitive!(array, i256, display)
}
_ => unreachable!(),
}
}
Expand Down
6 changes: 5 additions & 1 deletion src/array/primitive/mod.rs
Expand Up @@ -7,7 +7,7 @@ use crate::{
datatypes::*,
error::Error,
trusted_len::TrustedLen,
types::{days_ms, f16, months_days_ns, NativeType},
types::{days_ms, f16, i256, months_days_ns, NativeType},
};

use super::Array;
Expand Down Expand Up @@ -475,6 +475,8 @@ pub type Int32Array = PrimitiveArray<i32>;
pub type Int64Array = PrimitiveArray<i64>;
/// A type definition [`PrimitiveArray`] for `i128`
pub type Int128Array = PrimitiveArray<i128>;
/// A type definition [`PrimitiveArray`] for `i256`
pub type Int256Array = PrimitiveArray<i256>;
/// A type definition [`PrimitiveArray`] for [`days_ms`]
pub type DaysMsArray = PrimitiveArray<days_ms>;
/// A type definition [`PrimitiveArray`] for [`months_days_ns`]
Expand Down Expand Up @@ -504,6 +506,8 @@ pub type Int32Vec = MutablePrimitiveArray<i32>;
pub type Int64Vec = MutablePrimitiveArray<i64>;
/// A type definition [`MutablePrimitiveArray`] for `i128`
pub type Int128Vec = MutablePrimitiveArray<i128>;
/// A type definition [`MutablePrimitiveArray`] for `i256`
pub type Int256Vec = MutablePrimitiveArray<i256>;
/// A type definition [`MutablePrimitiveArray`] for [`days_ms`]
pub type DaysMsVec = MutablePrimitiveArray<days_ms>;
/// A type definition [`MutablePrimitiveArray`] for [`months_days_ns`]
Expand Down
3 changes: 2 additions & 1 deletion src/compute/arithmetics/mod.rs
Expand Up @@ -402,13 +402,14 @@ macro_rules! with_match_negatable {(
) => ({
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
use crate::datatypes::PrimitiveType::*;
use crate::types::{days_ms, months_days_ns};
use crate::types::{days_ms, months_days_ns, i256};
match $key_type {
Int8 => __with_ty__! { i8 },
Int16 => __with_ty__! { i16 },
Int32 => __with_ty__! { i32 },
Int64 => __with_ty__! { i64 },
Int128 => __with_ty__! { i128 },
Int256 => __with_ty__! { i256 },
DaysMs => __with_ty__! { days_ms },
MonthDayNano => __with_ty__! { months_days_ns },
UInt8 | UInt16 | UInt32 | UInt64 | Float16 => todo!(),
Expand Down
5 changes: 4 additions & 1 deletion src/compute/comparison/mod.rs
Expand Up @@ -69,12 +69,14 @@ macro_rules! match_eq_ord {(
) => ({
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
use crate::datatypes::PrimitiveType::*;
use crate::types::i256;
match $key_type {
Int8 => __with_ty__! { i8 },
Int16 => __with_ty__! { i16 },
Int32 => __with_ty__! { i32 },
Int64 => __with_ty__! { i64 },
Int128 => __with_ty__! { i128 },
Int256 => __with_ty__! { i256 },
DaysMs => todo!(),
MonthDayNano => todo!(),
UInt8 => __with_ty__! { u8 },
Expand All @@ -92,13 +94,14 @@ macro_rules! match_eq {(
) => ({
macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )}
use crate::datatypes::PrimitiveType::*;
use crate::types::{days_ms, months_days_ns, f16};
use crate::types::{days_ms, months_days_ns, f16, i256};
match $key_type {
Int8 => __with_ty__! { i8 },
Int16 => __with_ty__! { i16 },
Int32 => __with_ty__! { i32 },
Int64 => __with_ty__! { i64 },
Int128 => __with_ty__! { i128 },
Int256 => __with_ty__! { i256 },
DaysMs => __with_ty__! { days_ms },
MonthDayNano => __with_ty__! { months_days_ns },
UInt8 => __with_ty__! { u8 },
Expand Down
3 changes: 2 additions & 1 deletion src/compute/comparison/simd/native.rs
@@ -1,7 +1,7 @@
use std::convert::TryInto;

use super::{set, Simd8, Simd8Lanes, Simd8PartialEq, Simd8PartialOrd};
use crate::types::{days_ms, f16, months_days_ns};
use crate::types::{days_ms, f16, i256, months_days_ns};

simd8_native_all!(u8);
simd8_native_all!(u16);
Expand All @@ -11,6 +11,7 @@ simd8_native_all!(i8);
simd8_native_all!(i16);
simd8_native_all!(i32);
simd8_native_all!(i128);
simd8_native_all!(i256);
simd8_native_all!(i64);
simd8_native!(f16);
simd8_native_partial_eq!(f16);
Expand Down
3 changes: 2 additions & 1 deletion src/compute/comparison/simd/packed.rs
Expand Up @@ -2,7 +2,7 @@ use std::convert::TryInto;
use std::simd::{SimdPartialEq, SimdPartialOrd, ToBitMask};

use crate::types::simd::*;
use crate::types::{days_ms, f16, months_days_ns};
use crate::types::{days_ms, f16, i256, months_days_ns};

use super::*;

Expand Down Expand Up @@ -71,6 +71,7 @@ simd8!(i16, i16x8);
simd8!(i32, i32x8);
simd8!(i64, i64x8);
simd8_native_all!(i128);
simd8_native_all!(i256);
simd8_native!(f16);
simd8_native_partial_eq!(f16);
simd8!(f32, f32x8);
Expand Down
4 changes: 4 additions & 0 deletions src/datatypes/mod.rs
Expand Up @@ -154,6 +154,8 @@ pub enum DataType {
/// scale is the number of decimal places.
/// The number 999.99 has a precision of 5 and scale of 2.
Decimal(usize, usize),
/// Decimal backed by 256 bits
Decimal256(usize, usize),
/// Extension type.
Extension(String, Box<DataType>, Option<String>),
}
Expand Down Expand Up @@ -233,6 +235,7 @@ impl DataType {
PhysicalType::Primitive(PrimitiveType::Int64)
}
Decimal(_, _) => PhysicalType::Primitive(PrimitiveType::Int128),
Decimal256(_, _) => PhysicalType::Primitive(PrimitiveType::Int256),
UInt8 => PhysicalType::Primitive(PrimitiveType::UInt8),
UInt16 => PhysicalType::Primitive(PrimitiveType::UInt16),
UInt32 => PhysicalType::Primitive(PrimitiveType::UInt32),
Expand Down Expand Up @@ -299,6 +302,7 @@ impl From<PrimitiveType> for DataType {
PrimitiveType::UInt32 => DataType::UInt32,
PrimitiveType::UInt64 => DataType::UInt64,
PrimitiveType::Int128 => DataType::Decimal(32, 32),
PrimitiveType::Int256 => DataType::Decimal256(32, 32),
PrimitiveType::Float16 => DataType::Float16,
PrimitiveType::Float32 => DataType::Float32,
PrimitiveType::Float64 => DataType::Float64,
Expand Down
16 changes: 13 additions & 3 deletions src/ffi/schema.rs
Expand Up @@ -331,9 +331,18 @@ unsafe fn to_data_type(schema: &ArrowSchema) -> Result<DataType> {
"Decimal bit width is not a valid integer".to_string(),
)
})?;
if bit_width != 128 {
return Err(Error::OutOfSpec(
"Decimal256 is not supported".to_string(),
if bit_width == 256 {
return Ok(DataType::Decimal256(
precision_raw.parse::<usize>().map_err(|_| {
Error::OutOfSpec(
"Decimal precision is not a valid integer".to_string(),
)
})?,
scale_raw.parse::<usize>().map_err(|_| {
Error::OutOfSpec(
"Decimal scale is not a valid integer".to_string(),
)
})?,
));
}
(precision_raw, scale_raw)
Expand Down Expand Up @@ -438,6 +447,7 @@ fn to_format(data_type: &DataType) -> String {
)
}
DataType::Decimal(precision, scale) => format!("d:{},{}", precision, scale),
DataType::Decimal256(precision, scale) => format!("d:{},{},256", precision, scale),
DataType::List(_) => "+l".to_string(),
DataType::LargeList(_) => "+L".to_string(),
DataType::Struct(_) => "+s".to_string(),
Expand Down
29 changes: 19 additions & 10 deletions src/io/ipc/read/schema.rs
Expand Up @@ -198,16 +198,25 @@ fn get_data_type(
(DataType::Duration(time_unit), IpcField::default())
}
Decimal(decimal) => {
let data_type = DataType::Decimal(
decimal
.precision()?
.try_into()
.map_err(|_| Error::from(OutOfSpecKind::NegativeFooterLength))?,
decimal
.scale()?
.try_into()
.map_err(|_| Error::from(OutOfSpecKind::NegativeFooterLength))?,
);
let bit_width: usize = decimal
.bit_width()?
.try_into()
.map_err(|_| Error::from(OutOfSpecKind::NegativeFooterLength))?;
let precision: usize = decimal
.precision()?
.try_into()
.map_err(|_| Error::from(OutOfSpecKind::NegativeFooterLength))?;
let scale: usize = decimal
.scale()?
.try_into()
.map_err(|_| Error::from(OutOfSpecKind::NegativeFooterLength))?;

let data_type = match bit_width {
128 => DataType::Decimal(precision, scale),
256 => DataType::Decimal256(precision, scale),
_ => return Err(Error::from(OutOfSpecKind::NegativeFooterLength)),
};

(data_type, IpcField::default())
}
List(_) => {
Expand Down
8 changes: 7 additions & 1 deletion src/io/ipc/write/schema.rs
Expand Up @@ -197,6 +197,11 @@ fn serialize_type(data_type: &DataType) -> arrow_format::ipc::Type {
scale: *scale as i32,
bit_width: 128,
})),
Decimal256(precision, scale) => ipc::Type::Decimal(Box::new(ipc::Decimal {
precision: *precision as i32,
scale: *scale as i32,
bit_width: 256,
})),
Binary => ipc::Type::Binary(Box::new(ipc::Binary {})),
LargeBinary => ipc::Type::LargeBinary(Box::new(ipc::LargeBinary {})),
Utf8 => ipc::Type::Utf8(Box::new(ipc::Utf8 {})),
Expand Down Expand Up @@ -281,7 +286,8 @@ fn serialize_children(data_type: &DataType, ipc_field: &IpcField) -> Vec<arrow_f
| LargeBinary
| Utf8
| LargeUtf8
| Decimal(_, _) => vec![],
| Decimal(_, _)
| Decimal256(_, _) => vec![],
FixedSizeList(inner, _) | LargeList(inner) | List(inner) | Map(inner, _) => {
vec![serialize_field(inner, &ipc_field.fields[0])]
}
Expand Down
21 changes: 20 additions & 1 deletion src/io/json_integration/read/array.rs
Expand Up @@ -10,7 +10,7 @@ use crate::{
datatypes::{DataType, PhysicalType, PrimitiveType, Schema},
error::{Error, Result},
io::ipc::IpcField,
types::{days_ms, months_days_ns, NativeType},
types::{days_ms, i256, months_days_ns, NativeType},
};

use super::super::{ArrowJsonBatch, ArrowJsonColumn, ArrowJsonDictionaryBatch};
Expand Down Expand Up @@ -125,6 +125,24 @@ fn to_decimal(json_col: &ArrowJsonColumn, data_type: DataType) -> PrimitiveArray
PrimitiveArray::<i128>::new(data_type, values, validity)
}

fn to_decimal256(json_col: &ArrowJsonColumn, data_type: DataType) -> PrimitiveArray<i256> {
let validity = to_validity(&json_col.validity);
let values = json_col
.data
.as_ref()
.unwrap()
.iter()
.map(|value| match value {
Value::String(x) => i256(x.parse::<ethnum::I256>().unwrap()),
_ => {
panic!()
}
})
.collect();

PrimitiveArray::<i256>::new(data_type, values, validity)
}

fn to_primitive<T: NativeType + NumCast>(
json_col: &ArrowJsonColumn,
data_type: DataType,
Expand Down Expand Up @@ -280,6 +298,7 @@ pub fn to_array(
Primitive(PrimitiveType::Int32) => Ok(Box::new(to_primitive::<i32>(json_col, data_type))),
Primitive(PrimitiveType::Int64) => Ok(Box::new(to_primitive::<i64>(json_col, data_type))),
Primitive(PrimitiveType::Int128) => Ok(Box::new(to_decimal(json_col, data_type))),
Primitive(PrimitiveType::Int256) => Ok(Box::new(to_decimal256(json_col, data_type))),
Primitive(PrimitiveType::DaysMs) => Ok(Box::new(to_primitive_days_ms(json_col, data_type))),
Primitive(PrimitiveType::MonthDayNano) => {
Ok(Box::new(to_primitive_months_days_ns(json_col, data_type)))
Expand Down
32 changes: 20 additions & 12 deletions src/io/json_integration/read/schema.rs
Expand Up @@ -170,20 +170,28 @@ fn to_data_type(item: &Value, mut children: Vec<Field>) -> Result<DataType> {
"largeutf8" => LargeUtf8,
"decimal" => {
// return a list with any type as its child isn't defined in the map
let precision = match item.get("precision") {
Some(p) => Ok(p.as_u64().unwrap() as usize),
None => Err(Error::OutOfSpec(
"Expecting a precision for decimal".to_string(),
)),
};
let scale = match item.get("scale") {
Some(s) => Ok(s.as_u64().unwrap() as usize),
_ => Err(Error::OutOfSpec(
"Expecting a scale for decimal".to_string(),
)),
let precision = item
.get("precision")
.ok_or_else(|| Error::OutOfSpec("Expecting a precision for decimal".to_string()))?
.as_u64()
.unwrap() as usize;

let scale = item
.get("scale")
.ok_or_else(|| Error::OutOfSpec("Expecting a scale for decimal".to_string()))?
.as_u64()
.unwrap() as usize;

let bit_width = match item.get("bitWidth") {
Some(s) => s.as_u64().unwrap() as usize,
None => 128,
};

DataType::Decimal(precision?, scale?)
match bit_width {
128 => DataType::Decimal(precision, scale),
256 => DataType::Decimal256(precision, scale),
_ => todo!(),
}
}
"floatingpoint" => match item.get("precision") {
Some(p) if p == "HALF" => DataType::Float16,
Expand Down
3 changes: 3 additions & 0 deletions src/io/json_integration/write/schema.rs
Expand Up @@ -89,6 +89,9 @@ fn serialize_data_type(data_type: &DataType) -> Value {
DataType::Decimal(precision, scale) => {
json!({"name": "decimal", "precision": precision, "scale": scale})
}
DataType::Decimal256(precision, scale) => {
json!({"name": "decimal", "precision": precision, "scale": scale, "bit_width": 256})
}
DataType::Extension(_, inner_data_type, _) => serialize_data_type(inner_data_type),
}
}
Expand Down