Skip to content

Commit

Permalink
feat(rust, python): allow regex and wildcard in groupby (#5425)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Nov 4, 2022
1 parent 041143e commit 50d94d2
Show file tree
Hide file tree
Showing 18 changed files with 40 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .github/deploy_manylinux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ ls -la
rm py-polars/README.md
cp README.md py-polars/README.md
cd py-polars
rustup override set nightly-2022-10-24
rustup override set nightly-2022-11-03
export RUSTFLAGS='-C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma'

# first the default release
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
- name: Set up Rust
uses: dtolnay/rust-toolchain@master
with:
toolchain: nightly-2022-10-24
toolchain: nightly-2022-11-03

- name: Cache Rust
uses: Swatinem/rust-cache@v2
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/build-test-rust.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
- name: Set up Rust
uses: dtolnay/rust-toolchain@master
with:
toolchain: nightly-2022-10-24
toolchain: nightly-2022-11-03

- name: Cache Rust
uses: Swatinem/rust-cache@v2
Expand All @@ -58,7 +58,7 @@ jobs:
- name: Set up Rust
uses: dtolnay/rust-toolchain@master
with:
toolchain: nightly-2022-10-24
toolchain: nightly-2022-11-03
components: rustfmt, clippy, miri

- name: Cache Rust
Expand Down Expand Up @@ -102,7 +102,7 @@ jobs:
- name: Set up Rust
uses: dtolnay/rust-toolchain@master
with:
toolchain: nightly-2022-10-24
toolchain: nightly-2022-11-03

- name: Cache Rust
uses: Swatinem/rust-cache@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/create-py-release-mac-universal2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
- name: Set up Rust
uses: dtolnay/rust-toolchain@master
with:
toolchain: nightly-2022-10-24
toolchain: nightly-2022-11-03

- name: Set up Rust targets
run: rustup target add aarch64-apple-darwin
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/create-py-release-manylinux-lts-cpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
MATURIN_PASSWORD: ${{ secrets.PYPI_PASS }}
RUSTFLAGS: -C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt --cfg use_mimalloc
with:
rust-toolchain: nightly-2022-10-24
rust-toolchain: nightly-2022-11-03
maturin-version: '0.13.5'
command: publish
args: -m py-polars/Cargo.toml --skip-existing -o wheels -u ritchie46
6 changes: 3 additions & 3 deletions .github/workflows/create-py-release-manylinux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
MATURIN_PASSWORD: ${{ secrets.PYPI_PASS }}
RUSTFLAGS: -C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma
with:
rust-toolchain: nightly-2022-10-24
rust-toolchain: nightly-2022-11-03
maturin-version: '0.13.5'
command: publish
args: -m py-polars/Cargo.toml --skip-existing -o wheels -u ritchie46
Expand All @@ -53,7 +53,7 @@ jobs:
env:
MATURIN_PASSWORD: ${{ secrets.PYPI_PASS }}
with:
rust-toolchain: nightly-2022-10-24
rust-toolchain: nightly-2022-11-03
target: aarch64-unknown-linux-gnu
maturin-version: '0.13.5'
command: publish
Expand Down Expand Up @@ -84,7 +84,7 @@ jobs:
MATURIN_PASSWORD: ${{ secrets.PYPI_PASS }}
RUSTFLAGS: -C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma
with:
rust-toolchain: nightly-2022-10-24
rust-toolchain: nightly-2022-11-03
maturin-version: '0.13.5'
command: publish
args: -m py-polars/Cargo.toml --skip-existing -o wheels -u ritchie46
2 changes: 1 addition & 1 deletion .github/workflows/create-py-release-windows-macos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
MATURIN_PASSWORD: ${{ secrets.PYPI_PASS }}
RUSTFLAGS: -C target-feature=+fxsr,+sse,+sse2,+sse3,+sse4.1,+sse4.2
with:
rust-toolchain: nightly-2022-10-24
rust-toolchain: nightly-2022-11-03
maturin-version: '0.13.5'
command: publish
args: -m py-polars/Cargo.toml --no-sdist --skip-existing -o wheels -i python -u ritchie46
2 changes: 1 addition & 1 deletion .github/workflows/docs-deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
- name: Set up Rust
uses: dtolnay/rust-toolchain@master
with:
toolchain: nightly-2022-10-24
toolchain: nightly-2022-11-03
components: rust-docs

- name: Cache Rust
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint-python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
- name: Set up Rust
uses: dtolnay/rust-toolchain@master
with:
toolchain: nightly-2022-10-24
toolchain: nightly-2022-11-03
components: rustfmt, clippy
- name: Cache Rust
uses: Swatinem/rust-cache@v2
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
- name: Set up Rust
uses: dtolnay/rust-toolchain@master
with:
toolchain: nightly-2022-10-24
toolchain: nightly-2022-11-03

- name: Cache Rust
uses: Swatinem/rust-cache@v2
Expand Down Expand Up @@ -112,7 +112,7 @@ jobs:
- name: Set up Rust
uses: dtolnay/rust-toolchain@master
with:
toolchain: nightly-2022-10-24
toolchain: nightly-2022-11-03

- name: Cache Rust
uses: Swatinem/rust-cache@v2
Expand Down
5 changes: 4 additions & 1 deletion polars/polars-core/src/frame/cross_join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@ impl DataFrame {
) -> PolarsResult<(DataFrame, DataFrame)> {
let n_rows_left = self.height() as IdxSize;
let n_rows_right = other.height() as IdxSize;
let total_rows = n_rows_right * n_rows_left;
let Some(total_rows) = n_rows_left.checked_mul(n_rows_right) else {
return Err(PolarsError::ComputeError("Cross joins would produce more rows than fits into 2^32.\n\
Consider comping with polars-big-idx feature, or set 'allow_streaming'.".into()))
};

// the left side has the Nth row combined with every row from right.
// So let's say we have the following no. of rows
Expand Down
5 changes: 3 additions & 2 deletions polars/polars-lazy/polars-plan/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ impl LogicalPlanBuilder {

pub fn groupby<E: AsRef<[Expr]>>(
self,
keys: Arc<Vec<Expr>>,
keys: Vec<Expr>,
aggs: E,
apply: Option<Arc<dyn DataFrameUdf>>,
maintain_order: bool,
Expand All @@ -374,6 +374,7 @@ impl LogicalPlanBuilder {
) -> Self {
let current_schema = try_delayed!(self.0.schema(), &self.0, into);
let current_schema = current_schema.as_ref();
let keys = rewrite_projections(keys, current_schema, &[]);
let aggs = rewrite_projections(aggs.as_ref().to_vec(), current_schema, keys.as_ref());

let mut schema = try_delayed!(
Expand Down Expand Up @@ -422,7 +423,7 @@ impl LogicalPlanBuilder {

LogicalPlan::Aggregate {
input: Box::new(self.0),
keys,
keys: Arc::new(keys),
aggs,
schema: Arc::new(schema),
apply,
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-lazy/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1249,7 +1249,7 @@ impl LazyGroupBy {
#[cfg(feature = "dynamic_groupby")]
let lp = LogicalPlanBuilder::from(self.logical_plan)
.groupby(
Arc::new(self.keys),
self.keys,
aggs,
None,
self.maintain_order,
Expand All @@ -1260,7 +1260,7 @@ impl LazyGroupBy {

#[cfg(not(feature = "dynamic_groupby"))]
let lp = LogicalPlanBuilder::from(self.logical_plan)
.groupby(Arc::new(self.keys), aggs, None, self.maintain_order)
.groupby(self.keys, aggs, None, self.maintain_order)
.build();
LazyFrame::from_logical_plan(lp, self.opt_state)
}
Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -948,7 +948,7 @@ impl PyDataFrame {
}

pub fn select(&self, selection: Vec<&str>) -> PyResult<Self> {
let df = self.df.select(&selection).map_err(PyPolarsErr::from)?;
let df = self.df.select(selection).map_err(PyPolarsErr::from)?;
Ok(PyDataFrame::new(df))
}

Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ pub fn get_mmap_bytes_reader<'a>(py_f: &'a PyAny) -> PyResult<Box<dyn MmapBytesR
let s = pstring.to_string();
let p = std::path::Path::new(&s);
let p = resolve_homedir(p);
let f = match File::open(&p) {
let f = match File::open(p) {
Ok(file) => file,
Err(_e) => {
return Err(PyErr::new::<PyFileNotFoundError, _>(format!(
Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1099,7 +1099,7 @@ impl PyExpr {
.into()
}
pub fn exclude(&self, columns: Vec<String>) -> PyExpr {
self.inner.clone().exclude(&columns).into()
self.inner.clone().exclude(columns).into()
}
pub fn exclude_dtype(&self, dtypes: Vec<Wrap<DataType>>) -> PyExpr {
// Safety:
Expand Down
12 changes: 12 additions & 0 deletions py-polars/tests/unit/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,3 +200,15 @@ def test_take_in_groupby() -> None:
assert df.groupby("group").agg(
pl.col("values").take(1) - pl.col("values").take(2)
).sort("group").to_dict(False) == {"group": [1, 2], "values": [197, 494]}


def test_groupby_wildcard() -> None:
df = pl.DataFrame(
{
"a": [1, 2],
"b": [1, 2],
}
)
assert df.groupby([pl.col("*")], maintain_order=True).agg(
[pl.col("a").first().suffix("_agg")]
).to_dict(False) == {"a": [1, 2], "b": [1, 2], "a_agg": [1, 2]}
2 changes: 1 addition & 1 deletion rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[toolchain]
channel = "nightly-2022-10-24"
channel = "nightly-2022-11-03"

0 comments on commit 50d94d2

Please sign in to comment.