Skip to content

Commit

Permalink
vendor arrow simd operations so that we can use latest nightly
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 11, 2020
1 parent f392939 commit 3fbbc3f
Show file tree
Hide file tree
Showing 6 changed files with 290 additions and 46 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ Additional cargo features:
- pretty printing of DataFrames
* `temporal (default)`
- Conversions between Chrono and Polars for temporal data
* `simd`
* `simd (default)`
- SIMD operations
* `parquet`
- Read Apache Parquet format
Expand Down
7 changes: 4 additions & 3 deletions polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ readme = "../README.md"

[features]
pretty = ["prettytable-rs"]
simd = ["arrow/simd"]
simd = [] #["arrow/simd"]
docs = []
temporal = ["chrono"]
random = ["rand", "rand_distr"]
parallel = []
default = ["pretty", "docs", "temporal"]
default = ["pretty", "docs", "temporal", "simd"]
lazy = []

[dependencies]
Expand All @@ -33,4 +33,5 @@ chrono = {version = "^0.4.13", optional = true}
parquet = {version = "1", optional = true}
rand = {version = "0.7", optional = true}
rand_distr = {version = "0.3", optional = true}
ndarray = {version = "0.13", optional = true, default_features = false}
ndarray = {version = "0.13", optional = true, default_features = false}
packed_simd_2 = "0.3.4"
18 changes: 9 additions & 9 deletions polars/src/chunked_array/kernels/vendor/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ use num::{One, Zero};
use super::utils::apply_bin_op_to_option_bitmap;
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
use super::utils::simd_load_set_invalid;
use crate::datatypes::PolarsNumericType;
use arrow::array::*;
#[cfg(feature = "simd")]
use arrow::bitmap::Bitmap;
use arrow::buffer::Buffer;
#[cfg(feature = "simd")]
use arrow::buffer::MutableBuffer;
use arrow::datatypes;
use arrow::datatypes::ToByteSlice;
use arrow::error::{ArrowError, Result};
use arrow::util::bit_util;
Expand All @@ -54,7 +54,7 @@ pub fn math_op<T, F>(
op: F,
) -> Result<PrimitiveArray<T>>
where
T: datatypes::ArrowNumericType,
T: PolarsNumericType,
F: Fn(T::Native, T::Native) -> Result<T::Native>,
{
if left.len() != right.len() {
Expand Down Expand Up @@ -106,7 +106,7 @@ fn simd_math_op<T, F>(
op: F,
) -> Result<PrimitiveArray<T>>
where
T: datatypes::ArrowNumericType,
T: PolarsNumericType,
T::Simd: Add<Output = T::Simd>
+ Sub<Output = T::Simd>
+ Mul<Output = T::Simd>
Expand Down Expand Up @@ -161,7 +161,7 @@ where
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
fn simd_divide<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<PrimitiveArray<T>>
where
T: datatypes::ArrowNumericType,
T: PolarsNumericType,
T::Native: One + Zero,
{
if left.len() != right.len() {
Expand Down Expand Up @@ -203,7 +203,7 @@ where
T::write(simd_result, result_slice);
}

let null_bit_buffer = bitmap.map(|b| b.bits);
let null_bit_buffer = bitmap.map(|b| b.into_buffer());

let data = ArrayData::new(
T::get_data_type(),
Expand All @@ -221,7 +221,7 @@ where
/// then the result is also null.
pub fn add<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<PrimitiveArray<T>>
where
T: datatypes::ArrowNumericType,
T: PolarsNumericType,
T::Native: Add<Output = T::Native>
+ Sub<Output = T::Native>
+ Mul<Output = T::Native>
Expand All @@ -239,7 +239,7 @@ where
/// then the result is also null.
pub fn subtract<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<PrimitiveArray<T>>
where
T: datatypes::ArrowNumericType,
T: PolarsNumericType,
T::Native: Add<Output = T::Native>
+ Sub<Output = T::Native>
+ Mul<Output = T::Native>
Expand All @@ -257,7 +257,7 @@ where
/// then the result is also null.
pub fn multiply<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<PrimitiveArray<T>>
where
T: datatypes::ArrowNumericType,
T: PolarsNumericType,
T::Native: Add<Output = T::Native>
+ Sub<Output = T::Native>
+ Mul<Output = T::Native>
Expand All @@ -276,7 +276,7 @@ where
/// operation will be `Err(ArrowError::DivideByZero)`.
pub fn divide<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<PrimitiveArray<T>>
where
T: datatypes::ArrowNumericType,
T: PolarsNumericType,
T::Native: Add<Output = T::Native>
+ Sub<Output = T::Native>
+ Mul<Output = T::Native>
Expand Down
49 changes: 43 additions & 6 deletions polars/src/chunked_array/kernels/vendor/utils.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
use crate::datatypes::PolarsNumericType;
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
use arrow::array::PrimitiveArray;
use arrow::bitmap::Bitmap;
use arrow::buffer::Buffer;
use arrow::error::Result;
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
use arrow::{
array::{ArrayDataRef, PrimitiveArray},
datatypes::ArrowNumericType,
};
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
use num::One;
#[cfg(feature = "simd")]
use std::cmp::min;

/// Performs a SIMD load but sets all 'invalid' lanes to a constant value.
///
Expand All @@ -26,7 +27,7 @@ pub(crate) unsafe fn simd_load_set_invalid<T>(
fill_value: T::Native,
) -> T::Simd
where
T: ArrowNumericType,
T: PolarsNumericType,
T::Native: One,
{
let simd_with_zeros = T::load(array.value_slice(i, simd_width));
Expand Down Expand Up @@ -59,3 +60,39 @@ where
},
}
}

/// Creates a new SIMD mask, i.e. `packed_simd::m32x16` or similar. that indicates if the
/// corresponding array slots represented by the mask are 'valid'.
///
/// Lanes of the SIMD mask can be set to 'valid' (`true`) if the corresponding array slot is not
/// `NULL`, as indicated by it's `Bitmap`, and is within the length of the array. Lanes outside the
/// length represent padding and are set to 'invalid' (`false`).
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
unsafe fn is_valid<T>(
bitmap: &Option<Bitmap>,
i: usize,
simd_width: usize,
array_len: usize,
) -> T::SimdMask
where
T: PolarsNumericType,
{
let simd_upper_bound = i + simd_width;
let mut validity = T::mask_init(true);

// Validity based on `Bitmap`
if let Some(b) = bitmap {
for j in i..min(array_len, simd_upper_bound) {
if !b.is_set(j) {
validity = T::mask_set(validity, j - i, false);
}
}
}

// Validity based on the length of the Array
for j in array_len..simd_upper_bound {
validity = T::mask_set(validity, j - i, false);
}

validity
}

0 comments on commit 3fbbc3f

Please sign in to comment.