Skip to content

Commit

Permalink
perf(rust, python): use hash as CSE Identifier (#10385)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Aug 9, 2023
1 parent 079efac commit 70b7bf9
Show file tree
Hide file tree
Showing 8 changed files with 292 additions and 44 deletions.
111 changes: 91 additions & 20 deletions crates/polars-core/src/datatypes/any_value.rs
Expand Up @@ -527,32 +527,103 @@ impl From<AnyValue<'_>> for DataType {
}
}

impl<'a> Hash for AnyValue<'a> {
fn hash<H: Hasher>(&self, state: &mut H) {
impl AnyValue<'_> {
pub fn hash_impl<H: Hasher>(&self, state: &mut H, cheap: bool) {
use AnyValue::*;
std::mem::discriminant(self).hash(state);
match self {
Null => state.write_u64(u64::MAX / 2 + 135123),
Int8(v) => state.write_i8(*v),
Int16(v) => state.write_i16(*v),
Int32(v) => state.write_i32(*v),
Int64(v) => state.write_i64(*v),
UInt8(v) => state.write_u8(*v),
UInt16(v) => state.write_u16(*v),
UInt32(v) => state.write_u32(*v),
UInt64(v) => state.write_u64(*v),
Utf8(v) => state.write(v.as_bytes()),
Utf8Owned(v) => state.write(v.as_bytes()),
Float32(v) => state.write_u32(v.to_bits()),
Float64(v) => state.write_u64(v.to_bits()),
Binary(v) => state.write(v),
BinaryOwned(v) => state.write(v),
Boolean(v) => state.write_u8(*v as u8),
List(v) => Hash::hash(&Wrap(v.clone()), state),
_ => unimplemented!(),
Int8(v) => v.hash(state),
Int16(v) => v.hash(state),
Int32(v) => v.hash(state),
Int64(v) => v.hash(state),
UInt8(v) => v.hash(state),
UInt16(v) => v.hash(state),
UInt32(v) => v.hash(state),
UInt64(v) => v.hash(state),
Utf8(v) => v.hash(state),
Utf8Owned(v) => v.hash(state),
Float32(v) => v.to_ne_bytes().hash(state),
Float64(v) => v.to_ne_bytes().hash(state),
Binary(v) => v.hash(state),
BinaryOwned(v) => v.hash(state),
Boolean(v) => v.hash(state),
List(v) => {
if cheap {
let ptr = v as *const _ as usize;
ptr.hash(state);
} else {
Hash::hash(&Wrap(v.clone()), state)
}
}
#[cfg(feature = "dtype-array")]
Array(v, width) => {
if cheap {
let ptr = v as *const _ as usize;
ptr.hash(state);
} else {
Hash::hash(&Wrap(v.clone()), state)
}
width.hash(state)
}
#[cfg(feature = "dtype-date")]
Date(v) => v.hash(state),
#[cfg(feature = "dtype-datetime")]
Datetime(v, tu, tz) => {
v.hash(state);
tu.hash(state);
tz.hash(state);
}
#[cfg(feature = "dtype-duration")]
Duration(v, tz) => {
v.hash(state);
tz.hash(state);
}
#[cfg(feature = "dtype-time")]
Time(v) => v.hash(state),
#[cfg(feature = "dtype-categorical")]
Categorical(v, _, _) => v.hash(state),
#[cfg(feature = "object")]
Object(_) => {}
#[cfg(feature = "object")]
ObjectOwned(_) => {}
#[cfg(feature = "dtype-struct")]
Struct(_, arr, _) => {
if cheap {
let ptr = &(**arr) as *const StructArray;
ptr.hash(state)
} else {
let s = Series::try_from(("", arr.to_boxed())).unwrap();
Hash::hash(&Wrap(s), state)
}
}
#[cfg(feature = "dtype-struct")]
StructOwned(bx) => {
if cheap {
let ptr = bx as *const Box<_> as usize;
ptr.hash(state);
} else {
let arr = &bx.as_ref().0;
for av in arr {
av.hash_impl(state, cheap);
}
}
}
#[cfg(feature = "dtype-decimal")]
Decimal(v, k) => {
v.hash(state);
k.hash(state);
}
Null => {}
}
}
}

impl<'a> Hash for AnyValue<'a> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.hash_impl(state, false)
}
}

impl<'a> Eq for AnyValue<'a> {}

impl<'a, T> From<Option<T>> for AnyValue<'a>
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/src/dsl/function_expr/fused.rs
Expand Up @@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize};
use super::*;

#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Copy, Clone, PartialEq, Debug)]
#[derive(Copy, Clone, PartialEq, Debug, Hash)]
pub enum FusedOperator {
MultiplyAdd,
SubMultiply,
Expand Down
28 changes: 28 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/mod.rs
Expand Up @@ -53,6 +53,7 @@ mod trigonometry;
mod unique;

use std::fmt::{Display, Formatter};
use std::hash::{Hash, Hasher};
#[cfg(feature = "random")]
use std::sync::atomic::AtomicU64;

Expand Down Expand Up @@ -236,6 +237,33 @@ pub enum FunctionExpr {
SetSortedFlag(IsSorted),
}

impl Hash for FunctionExpr {
fn hash<H: Hasher>(&self, state: &mut H) {
std::mem::discriminant(self).hash(state);
match self {
FunctionExpr::BinaryExpr(f) => f.hash(state),
FunctionExpr::Boolean(f) => f.hash(state),
#[cfg(feature = "strings")]
FunctionExpr::StringExpr(f) => f.hash(state),
#[cfg(feature = "random")]
FunctionExpr::Random { method, .. } => method.hash(state),
#[cfg(feature = "range")]
FunctionExpr::Range(f) => f.hash(state),
#[cfg(feature = "temporal")]
FunctionExpr::TemporalExpr(f) => f.hash(state),
#[cfg(feature = "trigonometry")]
FunctionExpr::Trigonometry(f) => f.hash(state),
#[cfg(feature = "fused")]
FunctionExpr::Fused(f) => f.hash(state),
#[cfg(feature = "interpolate")]
FunctionExpr::Interpolate(f) => f.hash(state),
#[cfg(feature = "dtype-categorical")]
FunctionExpr::Categorical(f) => f.hash(state),
_ => {}
}
}
}

impl Display for FunctionExpr {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
use FunctionExpr::*;
Expand Down
6 changes: 6 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/random.rs
Expand Up @@ -23,6 +23,12 @@ pub enum RandomMethod {
},
}

impl Hash for RandomMethod {
fn hash<H: Hasher>(&self, state: &mut H) {
std::mem::discriminant(self).hash(state)
}
}

pub(super) fn random(
s: &Series,
method: RandomMethod,
Expand Down
27 changes: 27 additions & 0 deletions crates/polars-plan/src/logical_plan/aexpr/hash.rs
@@ -0,0 +1,27 @@
use std::hash::{Hash, Hasher};

use crate::prelude::AExpr;

impl Hash for AExpr {
// This hashes the variant, not the whole expression
fn hash<H: Hasher>(&self, state: &mut H) {
std::mem::discriminant(self).hash(state);

match self {
AExpr::Column(name) => name.hash(state),
AExpr::Alias(_, name) => name.hash(state),
AExpr::Nth(v) => v.hash(state),
AExpr::Literal(lv) => lv.hash(state),
AExpr::Function {
options, function, ..
} => {
options.hash(state);
function.hash(state)
}
AExpr::AnonymousFunction { options, .. } => {
options.hash(state);
}
_ => {}
}
}
}
1 change: 1 addition & 0 deletions crates/polars-plan/src/logical_plan/aexpr/mod.rs
@@ -1,3 +1,4 @@
mod hash;
mod schema;

use std::sync::Arc;
Expand Down
12 changes: 12 additions & 0 deletions crates/polars-plan/src/logical_plan/lit.rs
@@ -1,3 +1,5 @@
use std::hash::{Hash, Hasher};

#[cfg(feature = "temporal")]
use polars_core::export::chrono::{Duration as ChronoDuration, NaiveDate, NaiveDateTime};
use polars_core::prelude::*;
Expand Down Expand Up @@ -307,3 +309,13 @@ impl Literal for LiteralValue {
pub fn lit<L: Literal>(t: L) -> Expr {
t.lit()
}

impl Hash for LiteralValue {
fn hash<H: Hasher>(&self, state: &mut H) {
if let Some(v) = self.to_anyvalue() {
v.hash_impl(state, true)
} else {
0.hash(state)
}
}
}

0 comments on commit 70b7bf9

Please sign in to comment.