Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Improved performance in cast Primitive to Binary/String again (4x) #651

Merged
merged 7 commits into from
Dec 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ impl<O: Offset> BinaryArray<O> {
/// * The `data_type`'s physical type is not consistent with the offset `O`.
/// * The last element of `offsets` is different from `values.len()`.
/// * The validity is not `None` and its length is different from `offsets.len() - 1`.
pub fn from_data_unchecked(
pub unsafe fn from_data_unchecked(
data_type: DataType,
offsets: Buffer<O>,
values: Buffer<u8>,
Expand Down
90 changes: 57 additions & 33 deletions src/compute/cast/primitive_to.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::hash::Hash;

use crate::buffer::MutableBuffer;
use crate::error::Result;
use crate::{
array::*,
Expand All @@ -8,7 +9,6 @@ use crate::{
datatypes::{DataType, TimeUnit},
temporal_conversions::*,
types::NativeType,
util::lexical_to_bytes_mut,
};

use super::CastOptions;
Expand All @@ -17,21 +17,34 @@ use super::CastOptions;
pub fn primitive_to_binary<T: NativeType + lexical_core::ToLexical, O: Offset>(
from: &PrimitiveArray<T>,
) -> BinaryArray<O> {
let mut buffer = vec![];
let builder = from.iter().fold(
MutableBinaryArray::<O>::with_capacity(from.len()),
|mut builder, x| {
match x {
Some(x) => {
lexical_to_bytes_mut(*x, &mut buffer);
builder.push(Some(buffer.as_slice()));
}
None => builder.push_null(),
}
builder
},
);
builder.into()
let mut values: MutableBuffer<u8> = MutableBuffer::with_capacity(from.len());
let mut offsets: MutableBuffer<O> = MutableBuffer::with_capacity(from.len() + 1);
offsets.push(O::default());

let mut offset: usize = 0;

unsafe {
for x in from.values().iter() {
values.reserve(offset + T::FORMATTED_SIZE_DECIMAL);

let bytes = std::slice::from_raw_parts_mut(
values.as_mut_ptr().add(offset),
values.capacity() - offset,
);
let len = lexical_core::write_unchecked(*x, bytes).len();

offset += len;
offsets.push(O::from_isize(offset as isize).unwrap());
}
values.set_len(offset);
values.shrink_to_fit();
BinaryArray::<O>::from_data_unchecked(
BinaryArray::<O>::default_data_type(),
offsets.into(),
values.into(),
from.validity().cloned(),
)
}
}

pub(super) fn primitive_to_binary_dyn<T, O>(from: &dyn Array) -> Result<Box<dyn Array>>
Expand Down Expand Up @@ -70,23 +83,34 @@ where
pub fn primitive_to_utf8<T: NativeType + lexical_core::ToLexical, O: Offset>(
from: &PrimitiveArray<T>,
) -> Utf8Array<O> {
let mut buffer = vec![];
let builder = from.iter().fold(
MutableUtf8Array::<O>::with_capacity(from.len()),
|mut builder, x| {
match x {
Some(x) => {
lexical_to_bytes_mut(*x, &mut buffer);
builder.push(Some(unsafe {
std::str::from_utf8_unchecked(buffer.as_slice())
}));
}
None => builder.push_null(),
}
builder
},
);
builder.into()
let mut values: MutableBuffer<u8> = MutableBuffer::with_capacity(from.len());
let mut offsets: MutableBuffer<O> = MutableBuffer::with_capacity(from.len() + 1);
offsets.push(O::default());

let mut offset: usize = 0;

unsafe {
for x in from.values().iter() {
values.reserve(offset + T::FORMATTED_SIZE_DECIMAL);

let bytes = std::slice::from_raw_parts_mut(
values.as_mut_ptr().add(offset),
values.capacity() - offset,
);
let len = lexical_core::write_unchecked(*x, bytes).len();

offset += len;
offsets.push(O::from_isize(offset as isize).unwrap());
}
values.set_len(offset);
values.shrink_to_fit();
Utf8Array::<O>::from_data_unchecked(
Utf8Array::<O>::default_data_type(),
offsets.into(),
values.into(),
from.validity().cloned(),
)
}
}

pub(super) fn primitive_to_utf8_dyn<T, O>(from: &dyn Array) -> Result<Box<dyn Array>>
Expand Down