Skip to content

Commit

Permalink
feat(python): Series.get_chunks (#5701)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 2, 2022
1 parent afb755b commit 1e603be
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 1 deletion.
12 changes: 12 additions & 0 deletions polars/polars-core/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,18 @@ fn flatten_df(df: &DataFrame) -> impl Iterator<Item = DataFrame> + '_ {
}
})
}

pub fn flatten_series(s: &Series) -> Vec<Series> {
let name = s.name();
let dtype = s.dtype();
unsafe {
s.chunks()
.iter()
.map(|arr| Series::from_chunks_and_dtype_unchecked(name, vec![arr.clone()], dtype))
.collect()
}
}

pub fn split_df_as_ref(df: &DataFrame, n: usize) -> PolarsResult<Vec<DataFrame>> {
let total_len = df.height();
let chunk_size = total_len / n;
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series/miscellaneous.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ Miscellaneous
Series.series_equal
Series.set_sorted
Series.to_physical
Series.get_chunks
4 changes: 4 additions & 0 deletions py-polars/polars/internals/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4800,6 +4800,10 @@ def shrink_dtype(self) -> Series:
This can be used to reduce memory pressure.
"""

def get_chunks(self) -> list[Series]:
"""Get the chunks of this Series as a list of Series."""
return self._s.get_chunks()

# Below are the namespaces defined. Do not move these up in the definition of
# Series, as it confuses mypy between the type annotation `str` and the
# namespace `str`
Expand Down
11 changes: 10 additions & 1 deletion py-polars/src/series.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use numpy::PyArray1;
use polars_core::prelude::QuantileInterpolOptions;
use polars_core::series::IsSorted;
use polars_core::utils::CustomIterTools;
use polars_core::utils::{flatten_series, CustomIterTools};
use pyo3::exceptions::{PyRuntimeError, PyValueError};
use pyo3::prelude::*;
use pyo3::types::{PyBytes, PyList, PyTuple};
Expand Down Expand Up @@ -1099,6 +1099,15 @@ impl PySeries {
Err(e) => Err(PyErr::from(PyPolarsErr::from(e))),
}
}
pub fn get_chunks(&self) -> PyResult<Vec<PyObject>> {
Python::with_gil(|py| {
let wrap_s = py_modules::POLARS.getattr(py, "wrap_s").unwrap();
flatten_series(&self.series)
.into_iter()
.map(|s| wrap_s.call1(py, (Self::new(s),)))
.collect()
})
}
}

macro_rules! impl_ufuncs {
Expand Down
8 changes: 8 additions & 0 deletions py-polars/tests/unit/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2440,3 +2440,11 @@ def test_from_epoch_expr(

expected = pl.Series("timestamp", [exp, None]).cast(exp_type)
assert_series_equal(result, expected)


def test_get_chunks() -> None:
a = pl.Series("a", [1, 2])
b = pl.Series("a", [3, 4])
chunks = pl.concat([a, b], rechunk=False).get_chunks()
assert chunks[0].series_equal(a)
assert chunks[1].series_equal(b)

0 comments on commit 1e603be

Please sign in to comment.