Skip to content

Commit

Permalink
don't allow predicate pushdown if compared column is being coerced (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed May 21, 2022
1 parent 5f89ade commit 2243ecf
Show file tree
Hide file tree
Showing 11 changed files with 55 additions and 12 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/create-py-mac-universal2-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
- name: Install latest Rust nightly
uses: actions-rs/toolchain@v1
with:
toolchain: nightly-2022-04-01
toolchain: nightly-2022-05-20
override: true
components: rustfmt, clippy
- name: Setup universal2 targets for Rust
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/create-py-release-windows-macos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
rm py-polars/README.md
cp README.md py-polars/README.md
cd py-polars
rustup override set nightly-2022-04-01
rustup override set nightly-2022-05-20
export RUSTFLAGS='-C target-feature=+fxsr,+sse,+sse2,+sse3'
maturin publish \
--no-sdist \
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
pip install -r py-polars/build.requirements.txt
- name: Run formatting checks
run: |
cd py-polars && black --check . && blackdoc --check . && isort --check . && rustup override set nightly-2022-04-01 && cargo fmt --all -- --check && cd ..
cd py-polars && black --check . && blackdoc --check . && isort --check . && rustup override set nightly-2022-05-20 && cargo fmt --all -- --check && cd ..
- name: Run linting
run: |
cd py-polars && flake8 && cd ..
Expand All @@ -40,7 +40,7 @@ jobs:
- name: Run tests
run: |
export RUSTFLAGS="-C debuginfo=0"
cd py-polars && rustup override set nightly-2022-04-01 && make venv && make test-with-cov
cd py-polars && rustup override set nightly-2022-05-20 && make venv && make test-with-cov
cargo clippy
- name: Check doc examples
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-windows-python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
shell: bash
run: |
export RUSTFLAGS="-C debuginfo=0"
cd py-polars && rustup override set nightly-2022-04-01 && make build-and-test-no-venv
cd py-polars && rustup override set nightly-2022-05-01 && make build-and-test-no-venv
cargo clippy
# test if we can import polars without any requirements
- name: Import polars
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ fmt_toml:

coverage:
@bash -c "\
rustup override set nightly-2022-04-01; \
rustup override set nightly-2022-05-20; \
source <(cargo llvm-cov show-env --export-prefix); \
export CARGO_TARGET_DIR=\$$CARGO_LLVM_COV_TARGET_DIR; \
export CARGO_INCREMENTAL=1; \
Expand Down
8 changes: 8 additions & 0 deletions polars/polars-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,14 @@ pub(crate) static STRING_CACHE: Lazy<StringCache> = Lazy::new(Default::default);
// utility for the tests to ensure a single thread can execute
pub static SINGLE_LOCK: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));

#[cfg(feature = "dtype-categorical")]
pub fn with_string_cache<F: FnOnce() -> T, T>(func: F) -> T {
toggle_string_cache(true);
let out = func();
toggle_string_cache(false);
out
}

/// Use a global string cache for the Categorical Types.
///
/// This is used to cache the string categories locally.
Expand Down
12 changes: 10 additions & 2 deletions polars/polars-lazy/src/logical_plan/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,8 +231,16 @@ impl fmt::Debug for Expr {
}
}
Cast {
expr, data_type, ..
} => write!(f, "{:?}.cast({:?})", expr, data_type),
expr,
data_type,
strict,
} => {
if *strict {
write!(f, "{:?}.strict_cast({:?})", expr, data_type)
} else {
write!(f, "{:?}.cast({:?})", expr, data_type)
}
}
Ternary {
predicate,
truthy,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,9 @@ pub(super) fn project_other_column_is_predicate_pushdown_boundary(
| AExpr::AnonymousFunction {options: FunctionOptions { collect_groups: ApplyOptions::ApplyGroups, .. }, ..}
| AExpr::Function {options: FunctionOptions { collect_groups: ApplyOptions::ApplyGroups, .. }, ..}
| AExpr::BinaryExpr {..}
| AExpr::Cast {data_type: DataType::Float32 | DataType::Float64, ..}
// cast may create nulls
| AExpr::Cast {strict: false, ..}
// casts may produce null values, change values etc.
// they can fail in myriad ways
| AExpr::Cast {..}
// still need to investigate this one
| AExpr::Explode {..}
// A groupby needs all rows for aggregation
Expand Down
1 change: 0 additions & 1 deletion polars/tests/it/lazy/expressions/is_in.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use super::*;

#[test]
#[cfg(feature = "is_in")]
fn test_is_in() -> Result<()> {
let df = df![
"x" => [1, 2, 3],
Expand Down
1 change: 1 addition & 0 deletions polars/tests/it/lazy/expressions/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod apply;
mod arity;
mod expand;
#[cfg(feature = "is_in")]
mod is_in;
mod slice;
mod window;
Expand Down
27 changes: 27 additions & 0 deletions polars/tests/it/lazy/predicate_queries.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use super::*;
use polars_core::{with_string_cache, SINGLE_LOCK};

#[test]
fn test_predicate_after_renaming() -> Result<()> {
Expand Down Expand Up @@ -137,3 +138,29 @@ fn test_filter_block_join() -> Result<()> {

Ok(())
}

#[test]
#[cfg(all(feature = "is_in", feature = "dtype-categorical"))]
fn test_is_in_categorical_3420() -> Result<()> {
let df = df![
"a" => ["a", "b", "c", "d", "e"],
"b" => [1, 2, 3, 4, 5]
]?;

let _guard = SINGLE_LOCK.lock();

let out: Result<_> = with_string_cache(|| {
let s = Series::new("x", ["a", "b", "c"]).strict_cast(&DataType::Categorical(None))?;
df.lazy()
.with_column(col("a").strict_cast(DataType::Categorical(None)))
.filter(col("a").is_in(lit(s).alias("x")))
.collect()
});
let mut expected = df![
"a" => ["a", "b", "c"],
"b" => [1, 2, 3]
]?;
expected.try_apply("a", |s| s.cast(&DataType::Categorical(None)))?;
assert!(out?.frame_equal(&expected));
Ok(())
}

0 comments on commit 2243ecf

Please sign in to comment.