Skip to content

Commit

Permalink
python: concat also Series
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 4, 2021
1 parent c1177ef commit c9eb93e
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 12 deletions.
4 changes: 2 additions & 2 deletions py-polars/polars/eager/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1441,10 +1441,10 @@ def describe_cast(self: "DataFrame") -> "DataFrame":
describe_cast(self.median()),
]
)
summary.insert_at_idx(
summary.insert_at_idx( # type: ignore
0, pl.Series("describe", ["mean", "std", "min", "max", "median"])
)
return summary
return summary # type: ignore

def replace_at_idx(self, index: int, series: "pl.Series") -> None:
"""
Expand Down
25 changes: 16 additions & 9 deletions py-polars/polars/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

try:
from polars.polars import concat_df as _concat_df
from polars.polars import concat_series as _concat_series

_DOCUMENTING = False
except ImportError:
Expand All @@ -36,25 +37,31 @@ def get_dummies(df: "pl.DataFrame") -> "pl.DataFrame":
return df.to_dummies()


def concat(dfs: Sequence["pl.DataFrame"], rechunk: bool = True) -> "pl.DataFrame":
def concat(
items: Union[Sequence["pl.DataFrame"], Sequence["pl.Series"]], rechunk: bool = True
) -> Union["pl.DataFrame", "pl.Series"]:
"""
Aggregate all the Dataframes in a List of DataFrames to a single DataFrame.
Aggregate all the Dataframes/Series in a List of DataFrames/Series to a single DataFrame/Series.
Parameters
----------
dfs
DataFrames to concatenate.
items
DataFrames/Series to concatenate.
rechunk
rechunk the final DataFrame.
rechunk the final DataFrame/Series.
"""
if not len(dfs) > 0:
if not len(items) > 0:
raise ValueError("cannot concat empty list")

df = pl.wrap_df(_concat_df(dfs))
out: Union["pl.Series", "pl.DataFrame"]
if isinstance(items[0], pl.DataFrame):
out = pl.wrap_df(_concat_df(items))
else:
out = pl.wrap_s(_concat_series(items))

if rechunk:
return df.rechunk()
return df
return out.rechunk() # type: ignore
return out


def repeat(
Expand Down
5 changes: 5 additions & 0 deletions py-polars/src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ pub(crate) fn get_df(obj: &PyAny) -> PyResult<DataFrame> {
Ok(pydf.extract::<PyDataFrame>()?.df)
}

pub(crate) fn get_series(obj: &PyAny) -> PyResult<Series> {
let pydf = obj.getattr("_s")?;
Ok(pydf.extract::<PySeries>()?.series)
}

impl<'a, T> FromPyObject<'a> for Wrap<ChunkedArray<T>>
where
T: PyPolarsPrimitiveType,
Expand Down
19 changes: 18 additions & 1 deletion py-polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ pub mod prelude;
pub mod series;
pub mod utils;

use crate::conversion::{get_df, get_pyseq, Wrap};
use crate::conversion::{get_df, get_pyseq, get_series, Wrap};
use crate::error::PyPolarsEr;
use crate::file::get_either_file;
use crate::prelude::DataType;
Expand Down Expand Up @@ -143,6 +143,22 @@ fn concat_df(dfs: &PyAny) -> PyResult<PyDataFrame> {
Ok(df.into())
}

#[pyfunction]
fn concat_series(series: &PyAny) -> PyResult<PySeries> {
let (seq, _len) = get_pyseq(series)?;
let mut iter = seq.iter()?;
let first = iter.next().unwrap()?;

let mut s = get_series(first)?;

for res in iter {
let item = res?;
let item = get_series(item)?;
s.append(&item).map_err(PyPolarsEr::from)?;
}
Ok(s.into())
}

#[pyfunction]
fn ipc_schema(py: Python, py_f: PyObject) -> PyResult<PyObject> {
let metadata = match get_either_file(py_f, false)? {
Expand Down Expand Up @@ -184,6 +200,7 @@ fn polars(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(concat_str)).unwrap();
m.add_wrapped(wrap_pyfunction!(concat_lst)).unwrap();
m.add_wrapped(wrap_pyfunction!(concat_df)).unwrap();
m.add_wrapped(wrap_pyfunction!(concat_series)).unwrap();
m.add_wrapped(wrap_pyfunction!(ipc_schema)).unwrap();
Ok(())
}
8 changes: 8 additions & 0 deletions py-polars/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ def test_init_inputs():
pl.Series("bigint", [2 ** 64])


def test_concat():
s = pl.Series("a", [2, 1, 3])

assert pl.concat([s, s]).len() == 6
# check if s remains unchanged
assert s.len() == 3


def test_to_frame():
assert create_series().to_frame().shape == (2, 1)

Expand Down

0 comments on commit c9eb93e

Please sign in to comment.