Skip to content

Commit

Permalink
add hash to rust expressions (#3350)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed May 10, 2022
1 parent 2f3bb06 commit 6ef6271
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 20 deletions.
2 changes: 1 addition & 1 deletion polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ asof_join = ["polars-core/asof_join", "polars-lazy/asof_join"]
cross_join = ["polars-core/cross_join", "polars-lazy/cross_join"]
dot_product = ["polars-core/dot_product", "polars-lazy/dot_product"]
concat_str = ["polars-core/concat_str", "polars-lazy/concat_str"]
row_hash = ["polars-core/row_hash"]
row_hash = ["polars-core/row_hash", "polars-lazy/row_hash"]
reinterpret = ["polars-core/reinterpret"]
decompress = ["polars-io/decompress"]
decompress-fast = ["polars-io/decompress-fast"]
Expand Down
1 change: 1 addition & 0 deletions polars/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ list_eval = []
chunked_ids = []
list_to_struct = ["polars-ops/list_to_struct"]
python = ["pyo3"]
row_hash = ["polars-core/row_hash"]

# no guarantees whatsoever
private = ["polars-time/private"]
Expand Down
12 changes: 12 additions & 0 deletions polars/polars-lazy/src/dsl/function_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use serde::{Deserialize, Serialize};
pub enum FunctionExpr {
NullCount,
Pow(f64),
#[cfg(feature = "row_hash")]
Hash(usize),
}

impl FunctionExpr {
Expand All @@ -34,6 +36,8 @@ impl FunctionExpr {
match self {
NullCount => with_dtype(IDX_DTYPE),
Pow(_) => float_dtype(),
#[cfg(feature = "row_hash")]
Hash(_) => with_dtype(DataType::UInt64),
}
}
}
Expand Down Expand Up @@ -62,6 +66,14 @@ impl From<FunctionExpr> for NoEq<Arc<dyn SeriesUdf>> {
};
wrap!(f)
}
#[cfg(feature = "row_hash")]
Hash(seed) => {
let f = move |s: &mut [Series]| {
let s = &s[0];
Ok(s.hash(ahash::RandomState::with_seed(seed)).into_series())
};
wrap!(f)
}
}
}
}
6 changes: 6 additions & 0 deletions polars/polars-lazy/src/dsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1836,6 +1836,12 @@ impl Expr {
})
}

#[cfg(feature = "row_hash")]
/// Compute the hash of every element
pub fn hash(self, seed: usize) -> Expr {
self.map_private(FunctionExpr::Hash(seed), "hash")
}

#[cfg(feature = "strings")]
pub fn str(self) -> string::StringNameSpace {
string::StringNameSpace(self)
Expand Down
14 changes: 5 additions & 9 deletions py-polars/polars/internals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1781,24 +1781,20 @@ def is_between(
"include_bounds should be a boolean or [boolean, boolean]."
)

def hash(self, k0: int = 0, k1: int = 1, k2: int = 2, k3: int = 3) -> "Expr":
def hash(self, seed: int = 0, **kwargs: Any) -> "Expr":
"""
Hash the Series.
The hash value is of type `Datetime`
Parameters
----------
k0
seed parameter
k1
seed parameter
k2
seed parameter
k3
seed
seed parameter
"""
return wrap_expr(self._pyexpr.hash(k0, k1, k2, k3))
# kwargs is for backward compatibility
# can be removed later
return wrap_expr(self._pyexpr.hash(seed))

def reinterpret(self, signed: bool) -> "Expr":
"""
Expand Down
13 changes: 3 additions & 10 deletions py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -890,16 +890,6 @@ impl PyExpr {
pub fn dot(&self, other: PyExpr) -> PyExpr {
self.inner.clone().dot(other.inner).into()
}
pub fn hash(&self, k0: u64, k1: u64, k2: u64, k3: u64) -> PyExpr {
let function = move |s: Series| {
let hb = ahash::RandomState::with_seeds(k0, k1, k2, k3);
Ok(s.hash(hb).into_series())
};
self.clone()
.inner
.map(function, GetOutput::from_type(DataType::UInt64))
.into()
}

pub fn reinterpret(&self, signed: bool) -> PyExpr {
let function = move |s: Series| reinterpret(&s, signed);
Expand Down Expand Up @@ -1371,6 +1361,9 @@ impl PyExpr {
pub fn entropy(&self, base: f64) -> Self {
self.inner.clone().entropy(base).into()
}
pub fn hash(&self, seed: usize) -> Self {
self.inner.clone().hash(seed).into()
}
}

impl From<dsl::Expr> for PyExpr {
Expand Down

0 comments on commit 6ef6271

Please sign in to comment.