Skip to content

Commit

Permalink
feat!: follow SQL in null behavior for JOINS (#12840)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 2, 2023
1 parent f6b7491 commit 34330ec
Show file tree
Hide file tree
Showing 42 changed files with 561 additions and 272 deletions.
14 changes: 7 additions & 7 deletions crates/polars-arrow/src/compute/arity.rs
Expand Up @@ -2,7 +2,7 @@

use polars_error::PolarsResult;

use super::utils::{check_same_len, combine_validities};
use super::utils::{check_same_len, combine_validities_and};
use crate::array::PrimitiveArray;
use crate::bitmap::{Bitmap, MutableBitmap};
use crate::datatypes::ArrowDataType;
Expand Down Expand Up @@ -126,7 +126,7 @@ where
// the iteration, then the validity is changed to None to mark the value
// as Null
let bitmap: Bitmap = mut_bitmap.into();
let validity = combine_validities(array.validity(), Some(&bitmap));
let validity = combine_validities_and(array.validity(), Some(&bitmap));

PrimitiveArray::<O>::new(data_type, values, validity)
}
Expand Down Expand Up @@ -158,7 +158,7 @@ where
{
check_same_len(lhs, rhs).unwrap();

let validity = combine_validities(lhs.validity(), rhs.validity());
let validity = combine_validities_and(lhs.validity(), rhs.validity());

let values = lhs
.values()
Expand Down Expand Up @@ -186,7 +186,7 @@ where
{
check_same_len(lhs, rhs)?;

let validity = combine_validities(lhs.validity(), rhs.validity());
let validity = combine_validities_and(lhs.validity(), rhs.validity());

let values = lhs
.values()
Expand Down Expand Up @@ -214,7 +214,7 @@ where
{
check_same_len(lhs, rhs).unwrap();

let validity = combine_validities(lhs.validity(), rhs.validity());
let validity = combine_validities_and(lhs.validity(), rhs.validity());

let mut mut_bitmap = MutableBitmap::with_capacity(lhs.len());

Expand Down Expand Up @@ -272,13 +272,13 @@ where
.into();

let bitmap: Bitmap = mut_bitmap.into();
let validity = combine_validities(lhs.validity(), rhs.validity());
let validity = combine_validities_and(lhs.validity(), rhs.validity());

// The validity has to be checked against the bitmap created during the
// creation of the values with the iterator. If an error was found during
// the iteration, then the validity is changed to None to mark the value
// as Null
let validity = combine_validities(validity.as_ref(), Some(&bitmap));
let validity = combine_validities_and(validity.as_ref(), Some(&bitmap));

PrimitiveArray::<T>::new(data_type, values, validity)
}
4 changes: 2 additions & 2 deletions crates/polars-arrow/src/compute/boolean.rs
@@ -1,5 +1,5 @@
//! null-preserving operators such as [`and`], [`or`] and [`not`].
use super::utils::combine_validities;
use super::utils::combine_validities_and;
use crate::array::{Array, BooleanArray};
use crate::bitmap::{Bitmap, MutableBitmap};
use crate::datatypes::ArrowDataType;
Expand All @@ -23,7 +23,7 @@ where
F: Fn(&Bitmap, &Bitmap) -> Bitmap,
{
assert_lengths(lhs, rhs);
let validity = combine_validities(lhs.validity(), rhs.validity());
let validity = combine_validities_and(lhs.validity(), rhs.validity());

let left_buffer = lhs.values();
let right_buffer = rhs.values();
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-arrow/src/compute/mod.rs
Expand Up @@ -46,4 +46,4 @@ pub mod take;
#[cfg(feature = "compute_temporal")]
#[cfg_attr(docsrs, doc(cfg(feature = "compute_temporal")))]
pub mod temporal;
mod utils;
pub mod utils;
18 changes: 13 additions & 5 deletions crates/polars-arrow/src/compute/utils.rs
@@ -1,14 +1,22 @@
use std::ops::{BitAnd, BitOr};

use polars_error::{polars_bail, polars_ensure, PolarsResult};

use crate::array::Array;
use crate::bitmap::Bitmap;

pub fn combine_validities(lhs: Option<&Bitmap>, rhs: Option<&Bitmap>) -> Option<Bitmap> {
match (lhs, rhs) {
(Some(lhs), None) => Some(lhs.clone()),
(None, Some(rhs)) => Some(rhs.clone()),
pub fn combine_validities_and(opt_l: Option<&Bitmap>, opt_r: Option<&Bitmap>) -> Option<Bitmap> {
match (opt_l, opt_r) {
(Some(l), Some(r)) => Some(l.bitand(r)),
(None, Some(r)) => Some(r.clone()),
(Some(l), None) => Some(l.clone()),
(None, None) => None,
(Some(lhs), Some(rhs)) => Some(lhs & rhs),
}
}
pub fn combine_validities_or(opt_l: Option<&Bitmap>, opt_r: Option<&Bitmap>) -> Option<Bitmap> {
match (opt_l, opt_r) {
(Some(l), Some(r)) => Some(l.bitor(r)),
_ => None,
}
}

Expand Down
2 changes: 1 addition & 1 deletion crates/polars-arrow/src/legacy/compute/mod.rs
@@ -1,6 +1,6 @@
use crate::array::PrimitiveArray;
use crate::compute::utils::combine_validities_and;
use crate::datatypes::ArrowDataType;
use crate::legacy::utils::combine_validities_and;
use crate::types::NativeType;

pub mod arithmetics;
Expand Down
18 changes: 1 addition & 17 deletions crates/polars-arrow/src/legacy/utils.rs
@@ -1,7 +1,5 @@
use std::ops::{BitAnd, BitOr};

use crate::array::PrimitiveArray;
use crate::bitmap::{Bitmap, MutableBitmap};
use crate::bitmap::MutableBitmap;
use crate::datatypes::ArrowDataType;
use crate::legacy::bit_util::unset_bit_raw;
use crate::legacy::trusted_len::{FromIteratorReversed, TrustedLen, TrustedLenPush};
Expand Down Expand Up @@ -51,20 +49,6 @@ where
}
}

pub fn combine_validities_and(opt_l: Option<&Bitmap>, opt_r: Option<&Bitmap>) -> Option<Bitmap> {
match (opt_l, opt_r) {
(Some(l), Some(r)) => Some(l.bitand(r)),
(None, Some(r)) => Some(r.clone()),
(Some(l), None) => Some(l.clone()),
(None, None) => None,
}
}
pub fn combine_validities_or(opt_l: Option<&Bitmap>, opt_r: Option<&Bitmap>) -> Option<Bitmap> {
match (opt_l, opt_r) {
(Some(l), Some(r)) => Some(l.bitor(r)),
_ => None,
}
}
unsafe impl<I, J> crate::trusted_len::TrustedLen for TrustMyLength<I, J> where I: Iterator<Item = J> {}

pub trait CustomIterTools: Iterator {
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/arithmetic/mod.rs
Expand Up @@ -8,7 +8,7 @@ use std::ops::{Add, Div, Mul, Rem, Sub};
use arrow::array::PrimitiveArray;
use arrow::compute::arithmetics::basic;
use arrow::compute::arity_assign;
use arrow::legacy::utils::combine_validities_and;
use arrow::compute::utils::combine_validities_and;
use arrow::types::NativeType;
use num_traits::{Num, NumCast, ToPrimitive, Zero};
pub(super) use numeric::arithmetic_helper;
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/bitwise.rs
@@ -1,8 +1,8 @@
use std::ops::{BitAnd, BitOr, BitXor, Not};

use arrow::compute;
use arrow::compute::utils::combine_validities_and;
use arrow::legacy::compute::bitwise;
use arrow::legacy::utils::combine_validities_and;

use super::arithmetic::arithmetic_helper;
use super::*;
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/ops/arity.rs
@@ -1,7 +1,7 @@
use std::error::Error;

use arrow::array::Array;
use arrow::legacy::utils::combine_validities_and;
use arrow::compute::utils::combine_validities_and;

use crate::datatypes::{ArrayCollectIterExt, ArrayFromIter, StaticArray};
use crate::prelude::{ChunkedArray, PolarsDataType};
Expand Down
14 changes: 10 additions & 4 deletions crates/polars-core/src/datatypes/any_value.rs
Expand Up @@ -785,9 +785,9 @@ impl<'a> From<AnyValue<'a>> for Option<i64> {
}
}

impl PartialEq for AnyValue<'_> {
impl AnyValue<'_> {
#[inline]
fn eq(&self, other: &Self) -> bool {
pub fn eq_missing(&self, other: &Self, null_equal: bool) -> bool {
use AnyValue::*;
match (self, other) {
(UInt8(l), UInt8(r)) => *l == *r,
Expand All @@ -809,8 +809,7 @@ impl PartialEq for AnyValue<'_> {
(BinaryOwned(l), BinaryOwned(r)) => l == r,
(Binary(l), BinaryOwned(r)) => l == r,
(BinaryOwned(l), Binary(r)) => l == r,
// should it?
(Null, Null) => true,
(Null, Null) => null_equal,
#[cfg(feature = "dtype-time")]
(Time(l), Time(r)) => *l == *r,
#[cfg(all(feature = "dtype-datetime", feature = "dtype-date"))]
Expand Down Expand Up @@ -856,6 +855,13 @@ impl PartialEq for AnyValue<'_> {
}
}

impl PartialEq for AnyValue<'_> {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.eq_missing(other, true)
}
}

impl PartialOrd for AnyValue<'_> {
/// Only implemented for the same types and physical types!
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-core/src/datatypes/mod.rs
Expand Up @@ -35,6 +35,7 @@ use num_traits::{Bounded, FromPrimitive, Num, NumCast, One, Zero};
use polars_utils::abs_diff::AbsDiff;
use polars_utils::float::IsFloat;
use polars_utils::min_max::MinMax;
use polars_utils::nulls::IsNull;
#[cfg(feature = "serde")]
use serde::de::{EnumAccess, Error, Unexpected, VariantAccess, Visitor};
#[cfg(any(feature = "serde", feature = "serde-lazy"))]
Expand Down Expand Up @@ -261,6 +262,7 @@ pub trait NumericNative:
+ IsFloat
+ ArrayArithmetics
+ MinMax
+ IsNull
{
type PolarsType: PolarsNumericType;
}
Expand Down
9 changes: 9 additions & 0 deletions crates/polars-lazy/src/frame/mod.rs
Expand Up @@ -1791,6 +1791,7 @@ pub struct JoinBuilder {
force_parallel: bool,
suffix: Option<String>,
validation: JoinValidation,
join_nulls: bool,
}
impl JoinBuilder {
/// Create the `JoinBuilder` with the provided `LazyFrame` as the left table.
Expand All @@ -1803,6 +1804,7 @@ impl JoinBuilder {
right_on: vec![],
allow_parallel: true,
force_parallel: false,
join_nulls: false,
suffix: None,
validation: Default::default(),
}
Expand Down Expand Up @@ -1863,6 +1865,12 @@ impl JoinBuilder {
self
}

/// Join on null values. By default null values will never produce matches.
pub fn join_nulls(mut self, join_nulls: bool) -> Self {
self.join_nulls = join_nulls;
self
}

/// Suffix to add duplicate column names in join.
/// Defaults to `"_right"` if this method is never called.
pub fn suffix<S: AsRef<str>>(mut self, suffix: S) -> Self {
Expand All @@ -1883,6 +1891,7 @@ impl JoinBuilder {
validation: self.validation,
suffix: self.suffix,
slice: None,
join_nulls: self.join_nulls,
};

let lp = self
Expand Down
7 changes: 5 additions & 2 deletions crates/polars-lazy/src/physical_plan/expressions/window.rs
Expand Up @@ -563,13 +563,16 @@ impl PhysicalExpr for WindowExpr {
// group key from right column
let right = &keys[0];
group_by_columns[0]
.hash_join_left(right, JoinValidation::ManyToMany)
.hash_join_left(right, JoinValidation::ManyToMany, true)
.unwrap()
.1
} else {
let df_right = DataFrame::new_no_checks(keys);
let df_left = DataFrame::new_no_checks(group_by_columns);
private_left_join_multiple_keys(&df_left, &df_right, None, None).1
private_left_join_multiple_keys(
&df_left, &df_right, None, None, false,
)
.1
}
};

Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ops/src/chunked_array/list/sets.rs
Expand Up @@ -6,7 +6,7 @@ use arrow::array::{
PrimitiveArray, Utf8Array,
};
use arrow::bitmap::Bitmap;
use arrow::legacy::utils::combine_validities_and;
use arrow::compute::utils::combine_validities_and;
use arrow::offset::OffsetsBuffer;
use arrow::types::NativeType;
use polars_core::prelude::*;
Expand Down
95 changes: 0 additions & 95 deletions crates/polars-ops/src/frame/hashing.rs

This file was deleted.

0 comments on commit 34330ec

Please sign in to comment.