diff --git a/src/io/avro/write/serialize.rs b/src/io/avro/write/serialize.rs index 5a4e7d52541..fc01d040ae6 100644 --- a/src/io/avro/write/serialize.rs +++ b/src/io/avro/write/serialize.rs @@ -1,6 +1,7 @@ use avro_schema::Schema as AvroSchema; -use crate::datatypes::{PhysicalType, PrimitiveType}; +use crate::datatypes::{IntervalUnit, PhysicalType, PrimitiveType}; +use crate::types::months_days_ns; use crate::{array::*, datatypes::DataType}; use super::super::super::iterator::*; @@ -179,12 +180,52 @@ pub fn new_serializer<'a>(array: &'a dyn Array, schema: &AvroSchema) -> BoxSeria vec![], )) } - _ => todo!(), + (PhysicalType::Primitive(PrimitiveType::MonthDayNano), AvroSchema::Fixed(_)) => { + let values = array + .as_any() + .downcast_ref::>() + .unwrap(); + Box::new(BufStreamingIterator::new( + values.values().iter(), + interval_write, + vec![], + )) + } + (PhysicalType::Primitive(PrimitiveType::MonthDayNano), AvroSchema::Union(_)) => { + let values = array + .as_any() + .downcast_ref::>() + .unwrap(); + Box::new(BufStreamingIterator::new( + values.iter(), + |x, buf| { + util::zigzag_encode(x.is_some() as i64, buf).unwrap(); + if let Some(x) = x { + interval_write(x, buf) + } + }, + vec![], + )) + } + (a, b) => todo!("{:?} -> {:?} not supported", a, b), } } /// Whether [`new_serializer`] supports `data_type`. pub fn can_serialize(data_type: &DataType) -> bool { use DataType::*; - matches!(data_type, Boolean | Int32 | Int64 | Utf8 | Binary) + matches!( + data_type, + Boolean | Int32 | Int64 | Utf8 | Binary | Interval(IntervalUnit::MonthDayNano) + ) +} + +#[inline] +fn interval_write(x: &months_days_ns, buf: &mut Vec) { + // https://avro.apache.org/docs/current/spec.html#Duration + // 12 bytes, months, days, millis in LE + buf.reserve(12); + buf.extend(x.months().to_le_bytes()); + buf.extend(x.days().to_le_bytes()); + buf.extend(((x.ns() / 1_000_000) as i32).to_le_bytes()); } diff --git a/tests/it/io/avro/write.rs b/tests/it/io/avro/write.rs index 01c33bbd77c..bc29b969681 100644 --- a/tests/it/io/avro/write.rs +++ b/tests/it/io/avro/write.rs @@ -5,6 +5,7 @@ use arrow2::chunk::Chunk; use arrow2::datatypes::*; use arrow2::error::Result; use arrow2::io::avro::write; +use arrow2::types::months_days_ns; fn schema() -> Schema { Schema::new(vec![ @@ -16,6 +17,7 @@ fn schema() -> Schema { Field::new("e", DataType::Float64, false), Field::new("f", DataType::Boolean, false), Field::new("g", DataType::Utf8, true), + Field::new("h", DataType::Interval(IntervalUnit::MonthDayNano), true), ]) } @@ -29,6 +31,10 @@ fn data() -> Chunk> { Arc::new(PrimitiveArray::::from_slice([1.0, 2.0])) as Arc, Arc::new(BooleanArray::from_slice([true, false])) as Arc, Arc::new(Utf8Array::::from([Some("foo"), None])) as Arc, + Arc::new(PrimitiveArray::::from([ + Some(months_days_ns::new(1, 1, 10 * 1_000_000)), // 10 millis + None, + ])) as Arc, ]; Chunk::try_new(columns).unwrap()