-
-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
593 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
use arrow::array::ListArray; | ||
|
||
pub trait ValueSize { | ||
/// Useful for a Utf8 or a List to get underlying value size. | ||
/// During a rechunk this is handy | ||
fn get_values_size(&self) -> usize; | ||
} | ||
|
||
impl ValueSize for ListArray<i64> { | ||
fn get_values_size(&self) -> usize { | ||
self.values().len() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
/// Forked from Arrow until their API stabilizes. | ||
/// | ||
/// Note that the bound checks are optimized away. | ||
/// | ||
|
||
#[cfg(feature = "simd")] | ||
use packed_simd::u8x64; | ||
|
||
const BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128]; | ||
|
||
/// Returns the nearest number that is `>=` than `num` and is a multiple of 64 | ||
#[inline] | ||
pub fn round_upto_multiple_of_64(num: usize) -> usize { | ||
round_upto_power_of_2(num, 64) | ||
} | ||
|
||
/// Returns the nearest multiple of `factor` that is `>=` than `num`. Here `factor` must | ||
/// be a power of 2. | ||
pub fn round_upto_power_of_2(num: usize, factor: usize) -> usize { | ||
debug_assert!(factor > 0 && (factor & (factor - 1)) == 0); | ||
(num + (factor - 1)) & !(factor - 1) | ||
} | ||
|
||
/// Returns whether bit at position `i` in `data` is set or not | ||
#[inline] | ||
pub fn get_bit(data: &[u8], i: usize) -> bool { | ||
(data[i >> 3] & BIT_MASK[i & 7]) != 0 | ||
} | ||
|
||
/// Returns whether bit at position `i` in `data` is set or not. | ||
/// | ||
/// # Safety | ||
/// | ||
/// Note this doesn't do any bound checking, for performance reason. The caller is | ||
/// responsible to guarantee that `i` is within bounds. | ||
#[inline] | ||
pub unsafe fn get_bit_raw(data: *const u8, i: usize) -> bool { | ||
(*data.add(i >> 3) & BIT_MASK[i & 7]) != 0 | ||
} | ||
|
||
/// Sets bit at position `i` for `data` | ||
#[inline] | ||
pub fn set_bit(data: &mut [u8], i: usize) { | ||
data[i >> 3] |= BIT_MASK[i & 7]; | ||
} | ||
|
||
/// Sets bit at position `i` for `data` | ||
/// | ||
/// # Safety | ||
/// | ||
/// Note this doesn't do any bound checking, for performance reason. The caller is | ||
/// responsible to guarantee that `i` is within bounds. | ||
#[inline] | ||
pub unsafe fn set_bit_raw(data: *mut u8, i: usize) { | ||
*data.add(i >> 3) |= BIT_MASK[i & 7]; | ||
} | ||
|
||
/// Sets bit at position `i` for `data` to 0 | ||
#[inline] | ||
pub fn unset_bit(data: &mut [u8], i: usize) { | ||
data[i >> 3] ^= BIT_MASK[i & 7]; | ||
} | ||
|
||
/// Sets bit at position `i` for `data` to 0 | ||
/// | ||
/// # Safety | ||
/// | ||
/// Note this doesn't do any bound checking, for performance reason. The caller is | ||
/// responsible to guarantee that `i` is within bounds. | ||
#[inline] | ||
pub unsafe fn unset_bit_raw(data: *mut u8, i: usize) { | ||
*data.add(i >> 3) ^= BIT_MASK[i & 7]; | ||
} | ||
|
||
/// Returns the ceil of `value`/`divisor` | ||
#[inline] | ||
pub fn ceil(value: usize, divisor: usize) -> usize { | ||
let (quot, rem) = (value / divisor, value % divisor); | ||
if rem > 0 && divisor > 0 { | ||
quot + 1 | ||
} else { | ||
quot | ||
} | ||
} | ||
|
||
/// Performs SIMD bitwise binary operations. | ||
/// | ||
/// # Safety | ||
/// | ||
/// Note that each slice should be 64 bytes and it is the callers responsibility to ensure | ||
/// that this is the case. If passed slices larger than 64 bytes the operation will only | ||
/// be performed on the first 64 bytes. Slices less than 64 bytes will panic. | ||
#[cfg(simd)] | ||
pub unsafe fn bitwise_bin_op_simd<F>(left: &[u8], right: &[u8], result: &mut [u8], op: F) | ||
where | ||
F: Fn(u8x64, u8x64) -> u8x64, | ||
{ | ||
let left_simd = u8x64::from_slice_unaligned_unchecked(left); | ||
let right_simd = u8x64::from_slice_unaligned_unchecked(right); | ||
let simd_result = op(left_simd, right_simd); | ||
simd_result.write_to_slice_unaligned_unchecked(result); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
pub trait IsValid { | ||
/// # Safety | ||
/// no bound checks | ||
unsafe fn is_valid_unchecked(&self, i: usize) -> bool; | ||
|
||
/// # Safety | ||
/// no bound checks | ||
unsafe fn is_null_unchecked(&self, i: usize) -> bool; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
use std::borrow::Cow; | ||
use thiserror::Error as ThisError; | ||
|
||
type ErrString = Cow<'static, str>; | ||
|
||
#[derive(Debug, ThisError)] | ||
pub enum PolarsError { | ||
#[error(transparent)] | ||
ArrowError(#[from] arrow::error::ArrowError), | ||
#[error("{0}")] | ||
ComputeError(ErrString), | ||
#[error("Out of bounds: {0}")] | ||
OutOfBounds(ErrString), | ||
} | ||
|
||
pub type Result<T> = std::result::Result<T, PolarsError>; |
Oops, something went wrong.