Skip to content

Commit

Permalink
add DataFrame <-> DataFrame arithmetic
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 27, 2021
1 parent 263754a commit 21dbfb3
Show file tree
Hide file tree
Showing 9 changed files with 187 additions and 4 deletions.
1 change: 1 addition & 0 deletions polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ abs = ["polars-core/abs", "polars-lazy/abs"]
dynamic_groupby = ["polars-core/dynamic_groupby", "polars-lazy/dynamic_groupby"]
ewma = ["polars-core/ewma", "polars-lazy/ewma"]
dot_diagram = ["polars-lazy/dot_diagram"]
dataframe_arithmetic = ["polars-core/dataframe_arithmetic"]

# don't use this
private = ["polars-lazy/private"]
Expand Down
2 changes: 2 additions & 0 deletions polars/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ diagonal_concat = []
horizontal_concat = []
abs = []
ewma = ["polars-utils"]
dataframe_arithmetic = []

dynamic_groupby = ["polars-time", "dtype-datetime", "dtype-date"]

Expand Down Expand Up @@ -125,6 +126,7 @@ docs-selection = [
"diagonal_concat",
"horizontal_concat",
"abs",
"dataframe_arithmetic",
]

[dependencies]
Expand Down
90 changes: 90 additions & 0 deletions polars/polars-core/src/frame/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,93 @@ impl Rem<&Series> for DataFrame {
(&self).rem(rhs)
}
}

impl DataFrame {
fn binary_aligned(
&self,
other: &DataFrame,
f: &(dyn Fn(&Series, &Series) -> Result<Series> + Sync + Send),
) -> Result<DataFrame> {
let max_len = std::cmp::max(self.height(), other.height());
let max_width = std::cmp::max(self.width(), other.width());
let mut cols = self
.get_columns()
.par_iter()
.zip(other.get_columns().par_iter())
.map(|(l, r)| {
let diff_l = max_len - l.len();
let diff_r = max_len - r.len();

let st = get_supertype(l.dtype(), r.dtype())?;
let mut l = l.cast(&st)?;
let mut r = r.cast(&st)?;

if diff_l > 0 {
l = l.extend(AnyValue::Null, diff_l)?;
};
if diff_r > 0 {
r = r.extend(AnyValue::Null, diff_r)?;
};

f(&l, &r)
})
.collect::<Result<Vec<_>>>()?;

let col_len = cols.len();
if col_len < max_width {
let df = if col_len < self.width() { self } else { other };

for i in col_len..max_len {
let s = &df.get_columns()[i];
let name = s.name();
let dtype = s.dtype();

// trick to fill a series with nulls
let vals: &[Option<i32>] = &[None];
let s = Series::new(name, vals).cast(dtype)?;
cols.push(s.expand_at_index(0, max_len))
}
}
DataFrame::new(cols)
}
}

impl Add<&DataFrame> for &DataFrame {
type Output = Result<DataFrame>;

fn add(self, rhs: &DataFrame) -> Self::Output {
self.binary_aligned(rhs, &|a, b| Ok(a + b))
}
}

impl Sub<&DataFrame> for &DataFrame {
type Output = Result<DataFrame>;

fn sub(self, rhs: &DataFrame) -> Self::Output {
self.binary_aligned(rhs, &|a, b| Ok(a - b))
}
}

impl Div<&DataFrame> for &DataFrame {
type Output = Result<DataFrame>;

fn div(self, rhs: &DataFrame) -> Self::Output {
self.binary_aligned(rhs, &|a, b| Ok(a / b))
}
}

impl Mul<&DataFrame> for &DataFrame {
type Output = Result<DataFrame>;

fn mul(self, rhs: &DataFrame) -> Self::Output {
self.binary_aligned(rhs, &|a, b| Ok(a * b))
}
}

impl Rem<&DataFrame> for &DataFrame {
type Output = Result<DataFrame>;

fn rem(self, rhs: &DataFrame) -> Self::Output {
self.binary_aligned(rhs, &|a, b| Ok(a % b))
}
}
1 change: 1 addition & 0 deletions polars/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use crate::utils::{
accumulate_dataframes_horizontal, accumulate_dataframes_vertical, split_ca, split_df, NoNull,
};

#[cfg(feature = "dataframe_arithmetic")]
mod arithmetic;
#[cfg(feature = "asof_join")]
pub(crate) mod asof_join;
Expand Down
1 change: 1 addition & 0 deletions polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@
//! - `row_hash` - Utility to hash DataFrame rows to UInt64Chunked
//! - `diagonal_concat` - Concat diagonally thereby combining different schemas.
//! - `horizontal_concat` - Concat horizontally and extend with null values if lengths don't match
//! - `dataframe_arithmetic` - Arithmetic on (Dataframe and DataFrames) and (DataFrame on Series)
//! * `Series` operations:
//! - `is_in` - [Check for membership in `Series`](crate::chunked_array::ops::IsIn)
//! - `zip_with` - [Zip two Series/ ChunkedArrays](crate::chunked_array::ops::ChunkZip)
Expand Down
1 change: 1 addition & 0 deletions py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ features = [
"abs",
"ewma",
"dot_diagram",
"dataframe_arithmetic",
]

# [patch.crates-io]
Expand Down
34 changes: 30 additions & 4 deletions py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1059,22 +1059,48 @@ def __getstate__(self): # type: ignore
def __setstate__(self, state): # type: ignore
self._df = DataFrame(state)._df

def __mul__(self, other: Any) -> "DataFrame":
def __mul__(
self, other: Union["DataFrame", "pli.Series", int, float, bool]
) -> "DataFrame":
if isinstance(other, DataFrame):
return wrap_df(self._df.mul_df(other._df))

other = _prepare_other_arg(other)
return wrap_df(self._df.mul(other._s))

def __truediv__(self, other: Any) -> "DataFrame":
def __truediv__(
self, other: Union["DataFrame", "pli.Series", int, float, bool]
) -> "DataFrame":
if isinstance(other, DataFrame):
return wrap_df(self._df.div_df(other._df))

other = _prepare_other_arg(other)
return wrap_df(self._df.div(other._s))

def __add__(self, other: Any) -> "DataFrame":
def __add__(
self, other: Union["DataFrame", "pli.Series", int, float, bool]
) -> "DataFrame":
if isinstance(other, DataFrame):
return wrap_df(self._df.add_df(other._df))
other = _prepare_other_arg(other)
return wrap_df(self._df.add(other._s))

def __sub__(self, other: Any) -> "DataFrame":
def __sub__(
self, other: Union["DataFrame", "pli.Series", int, float, bool]
) -> "DataFrame":
if isinstance(other, DataFrame):
return wrap_df(self._df.sub_df(other._df))
other = _prepare_other_arg(other)
return wrap_df(self._df.sub(other._s))

def __mod__(
self, other: Union["DataFrame", "pli.Series", int, float, bool]
) -> "DataFrame":
if isinstance(other, DataFrame):
return wrap_df(self._df.rem_df(other._df))
other = _prepare_other_arg(other)
return wrap_df(self._df.rem(other._s))

def __str__(self) -> str:
return self._df.as_str()

Expand Down
25 changes: 25 additions & 0 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,31 @@ impl PyDataFrame {
Ok(df.into())
}

pub fn add_df(&self, s: &Self) -> PyResult<Self> {
let df = (&self.df + &s.df).map_err(PyPolarsEr::from)?;
Ok(df.into())
}

pub fn sub_df(&self, s: &Self) -> PyResult<Self> {
let df = (&self.df - &s.df).map_err(PyPolarsEr::from)?;
Ok(df.into())
}

pub fn div_df(&self, s: &Self) -> PyResult<Self> {
let df = (&self.df / &s.df).map_err(PyPolarsEr::from)?;
Ok(df.into())
}

pub fn mul_df(&self, s: &Self) -> PyResult<Self> {
let df = (&self.df * &s.df).map_err(PyPolarsEr::from)?;
Ok(df.into())
}

pub fn rem_df(&self, s: &Self) -> PyResult<Self> {
let df = (&self.df % &s.df).map_err(PyPolarsEr::from)?;
Ok(df.into())
}

pub fn sample_n(&self, n: usize, with_replacement: bool, seed: u64) -> PyResult<Self> {
let df = self
.df
Expand Down
36 changes: 36 additions & 0 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -1602,6 +1602,42 @@ def test_arithmetic() -> None:
expected = pl.DataFrame({"a": [-1, 0], "b": [1, 2]})
assert df_minus.frame_equal(expected)

df_mod = df % 2
expected = pl.DataFrame({"a": [1.0, 0.0], "b": [1.0, 0.0]})
assert df_mod.frame_equal(expected)

df2 = pl.DataFrame({"c": [10]})

out = df + df2
expected = pl.DataFrame({"a": [11.0, None], "b": [None, None]}).with_column(
pl.col("b").cast(pl.Float64)
)
assert out.frame_equal(expected, null_equal=True)

out = df - df2
expected = pl.DataFrame({"a": [-9.0, None], "b": [None, None]}).with_column(
pl.col("b").cast(pl.Float64)
)
assert out.frame_equal(expected, null_equal=True)

out = df / df2
expected = pl.DataFrame({"a": [0.1, None], "b": [None, None]}).with_column(
pl.col("b").cast(pl.Float64)
)
assert out.frame_equal(expected, null_equal=True)

out = df * df2
expected = pl.DataFrame({"a": [10.0, None], "b": [None, None]}).with_column(
pl.col("b").cast(pl.Float64)
)
assert out.frame_equal(expected, null_equal=True)

out = df % df2
expected = pl.DataFrame({"a": [1.0, None], "b": [None, None]}).with_column(
pl.col("b").cast(pl.Float64)
)
assert out.frame_equal(expected, null_equal=True)


def test_getattr() -> None:
df = pl.DataFrame({"a": [1.0, 2.0]})
Expand Down

0 comments on commit 21dbfb3

Please sign in to comment.