Skip to content

Commit

Permalink
greatly speedup PrimiteChunked creation when collecting T::Native;
Browse files Browse the repository at this point in the history
speedup 10x
  • Loading branch information
ritchie46 committed Oct 11, 2020
1 parent b829df2 commit 8500844
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 17 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ Additional cargo features:
- pretty printing of DataFrames
* `temporal (default)`
- Conversions between Chrono and Polars for temporal data
* `simd (default)`
* `simd`
- SIMD operations
* `parquet`
- Read Apache Parquet format
Expand Down
2 changes: 1 addition & 1 deletion polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ docs = []
temporal = ["chrono"]
random = ["rand", "rand_distr"]
parallel = []
default = ["pretty", "docs", "temporal", "simd"]
default = ["pretty", "docs", "temporal"]
lazy = []

[dependencies]
Expand Down
17 changes: 9 additions & 8 deletions polars/src/chunked_array/builder.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::prelude::*;
use crate::utils::get_iter_capacity;
use crate::utils::{get_iter_capacity, Xob};
use arrow::array::{ArrayBuilder, ArrayDataBuilder, ArrayRef};
use arrow::datatypes::{ArrowPrimitiveType, Field, ToByteSlice};
pub use arrow::memory;
Expand Down Expand Up @@ -370,7 +370,9 @@ impl<T> AlignedVec<T> {
/// inner Vec is reached will reallocate the Vec without the alignment, leaving this destructor's
/// alignment incorrect
pub unsafe fn push(&mut self, value: T) {
debug_assert!(self.inner.len() < self.capacity);
if self.inner.len() == self.capacity {
self.reserve(1);
}
self.inner.push(value)
}

Expand Down Expand Up @@ -408,9 +410,7 @@ where
T: ArrowPrimitiveType,
{
fn new_from_slice(name: &str, v: &[T::Native]) -> Self {
let mut builder = PrimitiveChunkedBuilder::<T>::new(name, v.len());
v.iter().for_each(|&v| builder.append_value(v));
builder.finish()
Self::new_from_iter(name, v.into_iter().copied())
}

fn new_from_opt_slice(name: &str, opt_v: &[Option<T::Native>]) -> Self {
Expand All @@ -430,9 +430,10 @@ where

/// Create a new ChunkedArray from an iterator.
fn new_from_iter(name: &str, it: impl Iterator<Item = T::Native>) -> ChunkedArray<T> {
let mut builder = PrimitiveChunkedBuilder::new(name, get_iter_capacity(&it));
it.for_each(|opt| builder.append_value(opt));
builder.finish()
let ca: Xob<ChunkedArray<_>> = it.collect();
let mut ca = ca.into_inner();
ca.rename(name);
ca
}
}

Expand Down
2 changes: 1 addition & 1 deletion polars/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,7 @@ impl<T> ChunkedArray<T>
where
T: ArrowPrimitiveType,
{
/// Create a new ChunkedArray by taking ownershipt of the AlignedVec. This operation is zero copy.
/// Create a new ChunkedArray by taking ownership of the AlignedVec. This operation is zero copy.
pub fn new_from_aligned_vec(name: &str, v: AlignedVec<T::Native>) -> Self {
let arr = aligned_vec_to_primitive_array::<T>(v, None, Some(0));
Self::new_from_chunks(name, vec![Arc::new(arr)])
Expand Down
24 changes: 19 additions & 5 deletions polars/src/chunked_array/upstream_traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,28 @@ impl<T> FromIterator<T::Native> for Xob<ChunkedArray<T>>
where
T: ArrowPrimitiveType,
{
// We use AlignedVec because it is way faster than Arrows builder. We can do this because we
// know we don't have null values.
fn from_iter<I: IntoIterator<Item = T::Native>>(iter: I) -> Self {
let iter = iter.into_iter();
let mut builder = PrimitiveChunkedBuilder::new("", get_iter_capacity(&iter));
// bools are bit packed
if let ArrowDataType::Boolean = T::get_data_type() {
let iter = iter.into_iter();
let mut builder = PrimitiveChunkedBuilder::new("", get_iter_capacity(&iter));

for val in iter {
builder.append_value(val);
for val in iter {
builder.append_value(val);
}
Xob::new(builder.finish())
} else {
let iter = iter.into_iter();
let mut v = AlignedVec::with_capacity_aligned(get_iter_capacity(&iter));

for val in iter {
unsafe { v.push(val) }
}
// TODO: shrink capacity
Xob::new(ChunkedArray::new_from_aligned_vec("", v))
}
Xob::new(builder.finish())
}
}

Expand Down
2 changes: 1 addition & 1 deletion polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@
//! - pretty printing of DataFrames
//! * `temporal (default)`
//! - Conversions between Chrono and Polars for temporal data
//! * `simd (default)`
//! * `simd`
//! - SIMD operations
//! * `parquet`
//! - Read Apache Parquet format
Expand Down

0 comments on commit 8500844

Please sign in to comment.