Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added FFI consume of dict.
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Aug 10, 2021
1 parent 7b95093 commit 522c7c7
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 17 deletions.
39 changes: 39 additions & 0 deletions src/array/dictionary/ffi.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
use crate::{
array::{FromFfi, PrimitiveArray, ToFfi},
error::Result,
ffi,
};

use super::{DictionaryArray, DictionaryKey};

unsafe impl<K: DictionaryKey> ToFfi for DictionaryArray<K> {
fn buffers(&self) -> Vec<Option<std::ptr::NonNull<u8>>> {
self.keys.buffers()
}

#[inline]
fn offset(&self) -> usize {
self.offset
}
}

unsafe impl<K: DictionaryKey, A: ffi::ArrowArrayRef> FromFfi<A> for DictionaryArray<K> {
fn try_from_ffi(array: A) -> Result<Self> {
// keys: similar to PrimitiveArray, but the datatype is the inner one
let length = array.array().len();
let offset = array.array().offset();
let mut validity = unsafe { array.validity() }?;
let mut values = unsafe { array.buffer::<K>(0) }?;

if offset > 0 {
values = values.slice(offset, length);
validity = validity.map(|x| x.slice(offset, length))
}
let keys = PrimitiveArray::<K>::from_data(K::DATA_TYPE, values, validity);
// values
let values = array.dictionary()?.unwrap();
let values = ffi::try_from(values)?.into();

Ok(DictionaryArray::<K>::from_data(keys, values))
}
}
14 changes: 2 additions & 12 deletions src/array/dictionary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ use crate::{
types::{NativeType, NaturalDataType},
};

mod ffi;
mod iterator;
mod mutable;
pub use iterator::*;
pub use mutable::*;

use super::{ffi::ToFfi, new_empty_array, primitive::PrimitiveArray, Array};
use super::{new_empty_array, primitive::PrimitiveArray, Array};

/// Trait denoting [`NativeType`]s that can be used as keys of a dictionary.
pub trait DictionaryKey: NativeType + NaturalDataType + num::NumCast + num::FromPrimitive {}
Expand Down Expand Up @@ -143,14 +144,3 @@ where
write!(f, "}}")
}
}

unsafe impl<K: DictionaryKey> ToFfi for DictionaryArray<K> {
fn buffers(&self) -> Vec<Option<std::ptr::NonNull<u8>>> {
vec![self.keys.validity().as_ref().map(|x| x.as_ptr())]
}

#[inline]
fn offset(&self) -> usize {
self.offset
}
}
11 changes: 10 additions & 1 deletion src/ffi/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::array::{BooleanArray, FromFfi};
use crate::error::{ArrowError, Result};
use crate::types::days_ms;
use crate::{
array::{Array, BinaryArray, ListArray, PrimitiveArray, StructArray, Utf8Array},
array::*,
datatypes::{DataType, IntervalUnit},
};

Expand Down Expand Up @@ -66,6 +66,15 @@ pub fn try_from<A: ArrowArrayRef>(array: A) -> Result<Box<dyn Array>> {
DataType::List(_) => Box::new(ListArray::<i32>::try_from_ffi(array)?),
DataType::LargeList(_) => Box::new(ListArray::<i64>::try_from_ffi(array)?),
DataType::Struct(_) => Box::new(StructArray::try_from_ffi(array)?),
DataType::Dictionary(keys, _) => match keys.as_ref() {
DataType::Int64 => Box::new(DictionaryArray::<i64>::try_from_ffi(array)?),
other => {
return Err(ArrowError::NotYetImplemented(format!(
"Reading dictionary of keys \"{}\" is not yet supported.",
other
)))
}
},
data_type => {
return Err(ArrowError::NotYetImplemented(format!(
"Reading DataType \"{}\" is not yet supported.",
Expand Down
20 changes: 19 additions & 1 deletion src/ffi/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,6 @@ fn create_child(
) -> Result<ArrowArrayChild<'static>> {
assert!(index < array.n_children as usize);
assert!(!array.children.is_null());
assert!(!array.children.is_null());
unsafe {
let arr_ptr = *array.children.add(index);
let schema_ptr = schema.child(index);
Expand All @@ -290,6 +289,21 @@ fn create_child(
}
}

fn create_dictionary(
array: &Ffi_ArrowArray,
schema: &Ffi_ArrowSchema,
parent: Arc<ArrowArray>,
) -> Result<Option<ArrowArrayChild<'static>>> {
let schema = schema.dictionary();
if let Some(schema) = schema {
assert!(!array.dictionary.is_null());
let array = unsafe { &*array.dictionary };
Ok(Some(ArrowArrayChild::from_raw(array, schema, parent)))
} else {
Ok(None)
}
}

pub trait ArrowArrayRef {
fn deallocation(&self) -> Deallocation {
Deallocation::Foreign(self.parent().clone())
Expand Down Expand Up @@ -334,6 +348,10 @@ pub trait ArrowArrayRef {
create_child(self.array(), self.schema(), self.parent().clone(), index)
}

fn dictionary(&self) -> Result<Option<ArrowArrayChild>> {
create_dictionary(self.array(), self.schema(), self.parent().clone())
}

fn parent(&self) -> &Arc<ArrowArray>;
fn array(&self) -> &Ffi_ArrowArray;
fn schema(&self) -> &Ffi_ArrowSchema;
Expand Down
24 changes: 21 additions & 3 deletions src/ffi/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,13 @@ impl Ffi_ArrowSchema {
unsafe { self.children.add(index).as_ref().unwrap().as_ref().unwrap() }
}

pub fn dictionary(&self) -> Option<&'static Self> {
if self.dictionary.is_null() {
return None;
};
Some(unsafe { self.dictionary.as_ref().unwrap() })
}

pub fn nullable(&self) -> bool {
(self.flags / 2) & 1 == 1
}
Expand All @@ -146,7 +153,19 @@ impl Drop for Ffi_ArrowSchema {
}

pub fn to_field(schema: &Ffi_ArrowSchema) -> Result<Field> {
let data_type = match schema.format() {
let dictionary = schema.dictionary();
let data_type = if let Some(dictionary) = dictionary {
let indices_data_type = to_data_type(schema)?;
let values_data_type = to_data_type(dictionary)?;
DataType::Dictionary(Box::new(indices_data_type), Box::new(values_data_type))
} else {
to_data_type(schema)?
};
Ok(Field::new(schema.name(), data_type, schema.nullable()))
}

fn to_data_type(schema: &Ffi_ArrowSchema) -> Result<DataType> {
Ok(match schema.format() {
"n" => DataType::Null,
"b" => DataType::Boolean,
"c" => DataType::Int8,
Expand Down Expand Up @@ -229,8 +248,7 @@ pub fn to_field(schema: &Ffi_ArrowSchema) -> Result<Field> {
)));
}
}
};
Ok(Field::new(schema.name(), data_type, schema.nullable()))
})
}

/// the inverse of [to_field]
Expand Down

0 comments on commit 522c7c7

Please sign in to comment.