Skip to content

Commit

Permalink
polars-utils crate
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 23, 2021
1 parent 3c85adf commit 1c5cf4e
Show file tree
Hide file tree
Showing 12 changed files with 159 additions and 119 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ members = [
"polars/polars-io",
"polars/polars-lazy",
"polars/polars-time",
"polars/polars-utils",
]

# [patch.crates-io]
Expand Down
115 changes: 0 additions & 115 deletions polars/polars-core/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,6 @@ impl<T> Deref for Wrap<T> {
}
}

unsafe fn index_of_unchecked<T>(slice: &[T], item: &T) -> usize {
(item as *const _ as usize - slice.as_ptr() as usize) / std::mem::size_of::<T>()
}

fn index_of<T>(slice: &[T], item: &T) -> Option<usize> {
debug_assert!(std::mem::size_of::<T>() > 0);
let ptr = item as *const T;
unsafe {
if slice.as_ptr() < ptr && slice.as_ptr().add(slice.len()) > ptr {
Some(index_of_unchecked(slice, item))
} else {
None
}
}
}

pub(crate) fn set_partition_size() -> usize {
let mut n_partitions = POOL.current_num_threads();
// set n_partitions to closes 2^n above the no of threads.
Expand Down Expand Up @@ -163,105 +147,6 @@ pub fn slice_offsets(offset: i64, length: usize, array_len: usize) -> (usize, us
}
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct Node(pub usize);

impl Default for Node {
fn default() -> Self {
Node(usize::MAX)
}
}

#[derive(Clone)]
#[cfg(feature = "private")]
pub struct Arena<T> {
items: Vec<T>,
}

impl<T> Default for Arena<T> {
fn default() -> Self {
Self::new()
}
}

/// Simple Arena implementation
/// Allocates memory and stores item in a Vec. Only deallocates when being dropped itself.
impl<T> Arena<T> {
pub fn add(&mut self, val: T) -> Node {
let idx = self.items.len();
self.items.push(val);
Node(idx)
}

pub fn pop(&mut self) -> Option<T> {
self.items.pop()
}

pub fn len(&self) -> usize {
self.items.len()
}

pub fn is_empty(&self) -> bool {
self.items.is_empty()
}

pub fn new() -> Self {
Arena { items: vec![] }
}

pub fn with_capacity(cap: usize) -> Self {
Arena {
items: Vec::with_capacity(cap),
}
}

pub fn get_node(&self, val: &T) -> Option<Node> {
index_of(&self.items, val).map(Node)
}

#[inline]
pub fn get(&self, idx: Node) -> &T {
debug_assert!(idx.0 < self.items.len());
unsafe { self.items.get_unchecked(idx.0) }
}

#[inline]
pub fn get_mut(&mut self, idx: Node) -> &mut T {
debug_assert!(idx.0 < self.items.len());
unsafe { self.items.get_unchecked_mut(idx.0) }
}

#[inline]
pub fn replace(&mut self, idx: Node, val: T) {
let x = self.get_mut(idx);
*x = val;
}
}

impl<T: Default> Arena<T> {
#[inline]
pub fn take(&mut self, idx: Node) -> T {
std::mem::take(self.get_mut(idx))
}

pub fn replace_with<F>(&mut self, idx: Node, f: F)
where
F: FnOnce(T) -> T,
{
let val = self.take(idx);
self.replace(idx, f(val));
}

pub fn try_replace_with<F>(&mut self, idx: Node, mut f: F) -> Result<()>
where
F: FnMut(T) -> Result<T>,
{
let val = self.take(idx);
self.replace(idx, f(val)?);
Ok(())
}
}

/// Apply a macro on the Series
#[macro_export]
macro_rules! match_dtype_to_physical_apply_macro {
Expand Down
1 change: 1 addition & 0 deletions polars/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ regex = { version = "1.4", optional = true }
polars-arrow = { version = "0.18.0", path = "../polars-arrow" }
polars-core = { version = "0.18.0", path = "../polars-core", features = ["lazy", "private", "zip_with"], default-features = false }
polars-io = { version = "0.18.0", path = "../polars-io", features = ["lazy", "csv-file", "private"], default-features = false }
polars-utils = { version = "0.1.0", path = "../polars-utils" }
# uncomment to have datafusion integration
# when uncommenting we both need to point to the same arrow version
# datafusion = {version="4.0.0-SNAPSHOT", git = "https://github.com/apache/arrow-datafusion", rev = "88222b7", default-features=false, optional=true}
Expand Down
3 changes: 2 additions & 1 deletion polars/polars-lazy/src/logical_plan/aexpr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ use crate::prelude::*;
use crate::utils::rename_field;
use polars_core::frame::groupby::{fmt_groupby_column, GroupByMethod};
use polars_core::prelude::*;
use polars_core::utils::{get_supertype, Arena, Node};
use polars_core::utils::get_supertype;
use polars_utils::arena::{Arena, Node};
use std::sync::Arc;

#[derive(Clone, Debug)]
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/src/logical_plan/alp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::utils::{aexprs_to_schema, PushNode};
use ahash::RandomState;
use polars_core::frame::groupby::DynamicGroupOptions;
use polars_core::prelude::*;
use polars_core::utils::{Arena, Node};
use polars_utils::arena::{Arena, Node};
use std::collections::HashSet;
#[cfg(any(feature = "csv-file", feature = "parquet"))]
use std::path::PathBuf;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use polars_core::utils::Arena;
use polars_utils::arena::Arena;

use crate::logical_plan::optimizer::stack_opt::OptimizationRule;
use crate::logical_plan::*;
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/src/prelude.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pub use polars_core::utils::{Arena, Node};
pub(crate) use polars_utils::arena::{Arena, Node};

pub use crate::{
dsl::*,
Expand Down
8 changes: 8 additions & 0 deletions polars/polars-utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[package]
name = "polars-utils"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
115 changes: 115 additions & 0 deletions polars/polars-utils/src/arena.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
use crate::error::*;

unsafe fn index_of_unchecked<T>(slice: &[T], item: &T) -> usize {
(item as *const _ as usize - slice.as_ptr() as usize) / std::mem::size_of::<T>()
}

fn index_of<T>(slice: &[T], item: &T) -> Option<usize> {
debug_assert!(std::mem::size_of::<T>() > 0);
let ptr = item as *const T;
unsafe {
if slice.as_ptr() < ptr && slice.as_ptr().add(slice.len()) > ptr {
Some(index_of_unchecked(slice, item))
} else {
None
}
}
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct Node(pub usize);

impl Default for Node {
fn default() -> Self {
Node(usize::MAX)
}
}

#[derive(Clone)]
pub struct Arena<T> {
items: Vec<T>,
}

impl<T> Default for Arena<T> {
fn default() -> Self {
Self::new()
}
}

/// Simple Arena implementation
/// Allocates memory and stores item in a Vec. Only deallocates when being dropped itself.
impl<T> Arena<T> {
pub fn add(&mut self, val: T) -> Node {
let idx = self.items.len();
self.items.push(val);
Node(idx)
}

pub fn pop(&mut self) -> Option<T> {
self.items.pop()
}

pub fn len(&self) -> usize {
self.items.len()
}

pub fn is_empty(&self) -> bool {
self.items.is_empty()
}

pub fn new() -> Self {
Arena { items: vec![] }
}

pub fn with_capacity(cap: usize) -> Self {
Arena {
items: Vec::with_capacity(cap),
}
}

pub fn get_node(&self, val: &T) -> Option<Node> {
index_of(&self.items, val).map(Node)
}

#[inline]
pub fn get(&self, idx: Node) -> &T {
debug_assert!(idx.0 < self.items.len());
unsafe { self.items.get_unchecked(idx.0) }
}

#[inline]
pub fn get_mut(&mut self, idx: Node) -> &mut T {
debug_assert!(idx.0 < self.items.len());
unsafe { self.items.get_unchecked_mut(idx.0) }
}

#[inline]
pub fn replace(&mut self, idx: Node, val: T) {
let x = self.get_mut(idx);
*x = val;
}
}

impl<T: Default> Arena<T> {
#[inline]
pub fn take(&mut self, idx: Node) -> T {
std::mem::take(self.get_mut(idx))
}

pub fn replace_with<F>(&mut self, idx: Node, f: F)
where
F: FnOnce(T) -> T,
{
let val = self.take(idx);
self.replace(idx, f(val));
}

pub fn try_replace_with<F>(&mut self, idx: Node, mut f: F) -> Result<()>
where
F: FnMut(T) -> Result<T>,
{
let val = self.take(idx);
self.replace(idx, f(val)?);
Ok(())
}
}
22 changes: 22 additions & 0 deletions polars/polars-utils/src/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
use std::borrow::Cow;
use std::fmt::{Display, Formatter};

type ErrString = Cow<'static, str>;

#[derive(Debug)]
pub enum PolarsUtilsError {
ComputeError(ErrString),
}

impl Display for PolarsUtilsError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
PolarsUtilsError::ComputeError(s) => {
let s = s.as_ref();
write!(f, "{}", s)
}
}
}
}

pub type Result<T> = std::result::Result<T, PolarsUtilsError>;
2 changes: 2 additions & 0 deletions polars/polars-utils/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pub mod arena;
mod error;
5 changes: 5 additions & 0 deletions py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 1c5cf4e

Please sign in to comment.