Skip to content

Commit

Permalink
arrow2: polars-arrow
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jun 13, 2021
1 parent c08e91c commit ad451f6
Show file tree
Hide file tree
Showing 11 changed files with 593 additions and 2 deletions.
3 changes: 1 addition & 2 deletions polars/polars-arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ description = "Arrow interfaces for Polars DataFrame library"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
arrow = {git = "https://github.com/apache/arrow-rs", rev = "0f55b828883b3b3afda43ae404b130d374e6f1a1", default-features = false}
#arrow = {version = "4.2", default-features = false }
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "a0068a8ae892b281f7c66f0d1ee7396cb20f753a", default-features = false }
thiserror = "^1.0"
num = "^0.4"
13 changes: 13 additions & 0 deletions polars/polars-arrow/src/array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
use arrow::array::ListArray;

pub trait ValueSize {
/// Useful for a Utf8 or a List to get underlying value size.
/// During a rechunk this is handy
fn get_values_size(&self) -> usize;
}

impl ValueSize for ListArray<i64> {
fn get_values_size(&self) -> usize {
self.values().len()
}
}
102 changes: 102 additions & 0 deletions polars/polars-arrow/src/bit_util.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/// Forked from Arrow until their API stabilizes.
///
/// Note that the bound checks are optimized away.
///

#[cfg(feature = "simd")]
use packed_simd::u8x64;

const BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128];

/// Returns the nearest number that is `>=` than `num` and is a multiple of 64
#[inline]
pub fn round_upto_multiple_of_64(num: usize) -> usize {
round_upto_power_of_2(num, 64)
}

/// Returns the nearest multiple of `factor` that is `>=` than `num`. Here `factor` must
/// be a power of 2.
pub fn round_upto_power_of_2(num: usize, factor: usize) -> usize {
debug_assert!(factor > 0 && (factor & (factor - 1)) == 0);
(num + (factor - 1)) & !(factor - 1)
}

/// Returns whether bit at position `i` in `data` is set or not
#[inline]
pub fn get_bit(data: &[u8], i: usize) -> bool {
(data[i >> 3] & BIT_MASK[i & 7]) != 0
}

/// Returns whether bit at position `i` in `data` is set or not.
///
/// # Safety
///
/// Note this doesn't do any bound checking, for performance reason. The caller is
/// responsible to guarantee that `i` is within bounds.
#[inline]
pub unsafe fn get_bit_raw(data: *const u8, i: usize) -> bool {
(*data.add(i >> 3) & BIT_MASK[i & 7]) != 0
}

/// Sets bit at position `i` for `data`
#[inline]
pub fn set_bit(data: &mut [u8], i: usize) {
data[i >> 3] |= BIT_MASK[i & 7];
}

/// Sets bit at position `i` for `data`
///
/// # Safety
///
/// Note this doesn't do any bound checking, for performance reason. The caller is
/// responsible to guarantee that `i` is within bounds.
#[inline]
pub unsafe fn set_bit_raw(data: *mut u8, i: usize) {
*data.add(i >> 3) |= BIT_MASK[i & 7];
}

/// Sets bit at position `i` for `data` to 0
#[inline]
pub fn unset_bit(data: &mut [u8], i: usize) {
data[i >> 3] ^= BIT_MASK[i & 7];
}

/// Sets bit at position `i` for `data` to 0
///
/// # Safety
///
/// Note this doesn't do any bound checking, for performance reason. The caller is
/// responsible to guarantee that `i` is within bounds.
#[inline]
pub unsafe fn unset_bit_raw(data: *mut u8, i: usize) {
*data.add(i >> 3) ^= BIT_MASK[i & 7];
}

/// Returns the ceil of `value`/`divisor`
#[inline]
pub fn ceil(value: usize, divisor: usize) -> usize {
let (quot, rem) = (value / divisor, value % divisor);
if rem > 0 && divisor > 0 {
quot + 1
} else {
quot
}
}

/// Performs SIMD bitwise binary operations.
///
/// # Safety
///
/// Note that each slice should be 64 bytes and it is the callers responsibility to ensure
/// that this is the case. If passed slices larger than 64 bytes the operation will only
/// be performed on the first 64 bytes. Slices less than 64 bytes will panic.
#[cfg(simd)]
pub unsafe fn bitwise_bin_op_simd<F>(left: &[u8], right: &[u8], result: &mut [u8], op: F)
where
F: Fn(u8x64, u8x64) -> u8x64,
{
let left_simd = u8x64::from_slice_unaligned_unchecked(left);
let right_simd = u8x64::from_slice_unaligned_unchecked(right);
let simd_result = op(left_simd, right_simd);
simd_result.write_to_slice_unaligned_unchecked(result);
}
9 changes: 9 additions & 0 deletions polars/polars-arrow/src/buffer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
pub trait IsValid {
/// # Safety
/// no bound checks
unsafe fn is_valid_unchecked(&self, i: usize) -> bool;

/// # Safety
/// no bound checks
unsafe fn is_null_unchecked(&self, i: usize) -> bool;
}
16 changes: 16 additions & 0 deletions polars/polars-arrow/src/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
use std::borrow::Cow;
use thiserror::Error as ThisError;

type ErrString = Cow<'static, str>;

#[derive(Debug, ThisError)]
pub enum PolarsError {
#[error(transparent)]
ArrowError(#[from] arrow::error::ArrowError),
#[error("{0}")]
ComputeError(ErrString),
#[error("Out of bounds: {0}")]
OutOfBounds(ErrString),
}

pub type Result<T> = std::result::Result<T, PolarsError>;

0 comments on commit ad451f6

Please sign in to comment.