add DataFrame <-> DataFrame arithmetic

pola-rs · Dec 27, 2021 · 21dbfb3 · 21dbfb3
1 parent 263754a
commit 21dbfb3
Show file tree

Hide file tree

Showing 9 changed files with 187 additions and 4 deletions.
diff --git a/polars/Cargo.toml b/polars/Cargo.toml
@@ -99,6 +99,7 @@ abs = ["polars-core/abs", "polars-lazy/abs"]
 dynamic_groupby = ["polars-core/dynamic_groupby", "polars-lazy/dynamic_groupby"]
 ewma = ["polars-core/ewma", "polars-lazy/ewma"]
 dot_diagram = ["polars-lazy/dot_diagram"]
+dataframe_arithmetic = ["polars-core/dataframe_arithmetic"]
 
 # don't use this
 private = ["polars-lazy/private"]

diff --git a/polars/polars-core/Cargo.toml b/polars/polars-core/Cargo.toml
@@ -74,6 +74,7 @@ diagonal_concat = []
 horizontal_concat = []
 abs = []
 ewma = ["polars-utils"]
+dataframe_arithmetic = []
 
 dynamic_groupby = ["polars-time", "dtype-datetime", "dtype-date"]
 
@@ -125,6 +126,7 @@ docs-selection = [
   "diagonal_concat",
   "horizontal_concat",
   "abs",
+  "dataframe_arithmetic",
 ]
 
 [dependencies]

diff --git a/polars/polars-core/src/frame/arithmetic.rs b/polars/polars-core/src/frame/arithmetic.rs
@@ -104,3 +104,93 @@ impl Rem<&Series> for DataFrame {
         (&self).rem(rhs)
     }
 }
+
+impl DataFrame {
+    fn binary_aligned(
+        &self,
+        other: &DataFrame,
+        f: &(dyn Fn(&Series, &Series) -> Result<Series> + Sync + Send),
+    ) -> Result<DataFrame> {
+        let max_len = std::cmp::max(self.height(), other.height());
+        let max_width = std::cmp::max(self.width(), other.width());
+        let mut cols = self
+            .get_columns()
+            .par_iter()
+            .zip(other.get_columns().par_iter())
+            .map(|(l, r)| {
+                let diff_l = max_len - l.len();
+                let diff_r = max_len - r.len();
+
+                let st = get_supertype(l.dtype(), r.dtype())?;
+                let mut l = l.cast(&st)?;
+                let mut r = r.cast(&st)?;
+
+                if diff_l > 0 {
+                    l = l.extend(AnyValue::Null, diff_l)?;
+                };
+                if diff_r > 0 {
+                    r = r.extend(AnyValue::Null, diff_r)?;
+                };
+
+                f(&l, &r)
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        let col_len = cols.len();
+        if col_len < max_width {
+            let df = if col_len < self.width() { self } else { other };
+
+            for i in col_len..max_len {
+                let s = &df.get_columns()[i];
+                let name = s.name();
+                let dtype = s.dtype();
+
+                // trick to fill a series with nulls
+                let vals: &[Option<i32>] = &[None];
+                let s = Series::new(name, vals).cast(dtype)?;
+                cols.push(s.expand_at_index(0, max_len))
+            }
+        }
+        DataFrame::new(cols)
+    }
+}
+
+impl Add<&DataFrame> for &DataFrame {
+    type Output = Result<DataFrame>;
+
+    fn add(self, rhs: &DataFrame) -> Self::Output {
+        self.binary_aligned(rhs, &|a, b| Ok(a + b))
+    }
+}
+
+impl Sub<&DataFrame> for &DataFrame {
+    type Output = Result<DataFrame>;
+
+    fn sub(self, rhs: &DataFrame) -> Self::Output {
+        self.binary_aligned(rhs, &|a, b| Ok(a - b))
+    }
+}
+
+impl Div<&DataFrame> for &DataFrame {
+    type Output = Result<DataFrame>;
+
+    fn div(self, rhs: &DataFrame) -> Self::Output {
+        self.binary_aligned(rhs, &|a, b| Ok(a / b))
+    }
+}
+
+impl Mul<&DataFrame> for &DataFrame {
+    type Output = Result<DataFrame>;
+
+    fn mul(self, rhs: &DataFrame) -> Self::Output {
+        self.binary_aligned(rhs, &|a, b| Ok(a * b))
+    }
+}
+
+impl Rem<&DataFrame> for &DataFrame {
+    type Output = Result<DataFrame>;
+
+    fn rem(self, rhs: &DataFrame) -> Self::Output {
+        self.binary_aligned(rhs, &|a, b| Ok(a % b))
+    }
+}
diff --git a/polars/polars-core/src/frame/mod.rs b/polars/polars-core/src/frame/mod.rs
@@ -17,6 +17,7 @@ use crate::utils::{
     accumulate_dataframes_horizontal, accumulate_dataframes_vertical, split_ca, split_df, NoNull,
 };
 
+#[cfg(feature = "dataframe_arithmetic")]
 mod arithmetic;
 #[cfg(feature = "asof_join")]
 pub(crate) mod asof_join;

diff --git a/polars/src/lib.rs b/polars/src/lib.rs
@@ -124,6 +124,7 @@
 //!     - `row_hash` - Utility to hash DataFrame rows to UInt64Chunked
 //!     - `diagonal_concat` - Concat diagonally thereby combining different schemas.
 //!     - `horizontal_concat` - Concat horizontally and extend with null values if lengths don't match
+//!     - `dataframe_arithmetic` - Arithmetic on (Dataframe and DataFrames) and (DataFrame on Series)
 //! * `Series` operations:
 //!     - `is_in` - [Check for membership in `Series`](crate::chunked_array::ops::IsIn)
 //!     - `zip_with` - [Zip two Series/ ChunkedArrays](crate::chunked_array::ops::ChunkZip)

diff --git a/py-polars/Cargo.toml b/py-polars/Cargo.toml
@@ -85,6 +85,7 @@ features = [
   "abs",
   "ewma",
   "dot_diagram",
+  "dataframe_arithmetic",
 ]
 
 # [patch.crates-io]

diff --git a/py-polars/polars/internals/frame.py b/py-polars/polars/internals/frame.py
@@ -1059,22 +1059,48 @@ def __getstate__(self):  # type: ignore
     def __setstate__(self, state):  # type: ignore
         self._df = DataFrame(state)._df
 
-    def __mul__(self, other: Any) -> "DataFrame":
+    def __mul__(
+        self, other: Union["DataFrame", "pli.Series", int, float, bool]
+    ) -> "DataFrame":
+        if isinstance(other, DataFrame):
+            return wrap_df(self._df.mul_df(other._df))
+
         other = _prepare_other_arg(other)
         return wrap_df(self._df.mul(other._s))
 
-    def __truediv__(self, other: Any) -> "DataFrame":
+    def __truediv__(
+        self, other: Union["DataFrame", "pli.Series", int, float, bool]
+    ) -> "DataFrame":
+        if isinstance(other, DataFrame):
+            return wrap_df(self._df.div_df(other._df))
+
         other = _prepare_other_arg(other)
         return wrap_df(self._df.div(other._s))
 
-    def __add__(self, other: Any) -> "DataFrame":
+    def __add__(
+        self, other: Union["DataFrame", "pli.Series", int, float, bool]
+    ) -> "DataFrame":
+        if isinstance(other, DataFrame):
+            return wrap_df(self._df.add_df(other._df))
         other = _prepare_other_arg(other)
         return wrap_df(self._df.add(other._s))
 
-    def __sub__(self, other: Any) -> "DataFrame":
+    def __sub__(
+        self, other: Union["DataFrame", "pli.Series", int, float, bool]
+    ) -> "DataFrame":
+        if isinstance(other, DataFrame):
+            return wrap_df(self._df.sub_df(other._df))
         other = _prepare_other_arg(other)
         return wrap_df(self._df.sub(other._s))
 
+    def __mod__(
+        self, other: Union["DataFrame", "pli.Series", int, float, bool]
+    ) -> "DataFrame":
+        if isinstance(other, DataFrame):
+            return wrap_df(self._df.rem_df(other._df))
+        other = _prepare_other_arg(other)
+        return wrap_df(self._df.rem(other._s))
+
     def __str__(self) -> str:
         return self._df.as_str()
 

diff --git a/py-polars/src/dataframe.rs b/py-polars/src/dataframe.rs
@@ -396,6 +396,31 @@ impl PyDataFrame {
         Ok(df.into())
     }
 
+    pub fn add_df(&self, s: &Self) -> PyResult<Self> {
+        let df = (&self.df + &s.df).map_err(PyPolarsEr::from)?;
+        Ok(df.into())
+    }
+
+    pub fn sub_df(&self, s: &Self) -> PyResult<Self> {
+        let df = (&self.df - &s.df).map_err(PyPolarsEr::from)?;
+        Ok(df.into())
+    }
+
+    pub fn div_df(&self, s: &Self) -> PyResult<Self> {
+        let df = (&self.df / &s.df).map_err(PyPolarsEr::from)?;
+        Ok(df.into())
+    }
+
+    pub fn mul_df(&self, s: &Self) -> PyResult<Self> {
+        let df = (&self.df * &s.df).map_err(PyPolarsEr::from)?;
+        Ok(df.into())
+    }
+
+    pub fn rem_df(&self, s: &Self) -> PyResult<Self> {
+        let df = (&self.df % &s.df).map_err(PyPolarsEr::from)?;
+        Ok(df.into())
+    }
+
     pub fn sample_n(&self, n: usize, with_replacement: bool, seed: u64) -> PyResult<Self> {
         let df = self
             .df

diff --git a/py-polars/tests/test_df.py b/py-polars/tests/test_df.py
@@ -1602,6 +1602,42 @@ def test_arithmetic() -> None:
     expected = pl.DataFrame({"a": [-1, 0], "b": [1, 2]})
     assert df_minus.frame_equal(expected)
 
+    df_mod = df % 2
+    expected = pl.DataFrame({"a": [1.0, 0.0], "b": [1.0, 0.0]})
+    assert df_mod.frame_equal(expected)
+
+    df2 = pl.DataFrame({"c": [10]})
+
+    out = df + df2
+    expected = pl.DataFrame({"a": [11.0, None], "b": [None, None]}).with_column(
+        pl.col("b").cast(pl.Float64)
+    )
+    assert out.frame_equal(expected, null_equal=True)
+
+    out = df - df2
+    expected = pl.DataFrame({"a": [-9.0, None], "b": [None, None]}).with_column(
+        pl.col("b").cast(pl.Float64)
+    )
+    assert out.frame_equal(expected, null_equal=True)
+
+    out = df / df2
+    expected = pl.DataFrame({"a": [0.1, None], "b": [None, None]}).with_column(
+        pl.col("b").cast(pl.Float64)
+    )
+    assert out.frame_equal(expected, null_equal=True)
+
+    out = df * df2
+    expected = pl.DataFrame({"a": [10.0, None], "b": [None, None]}).with_column(
+        pl.col("b").cast(pl.Float64)
+    )
+    assert out.frame_equal(expected, null_equal=True)
+
+    out = df % df2
+    expected = pl.DataFrame({"a": [1.0, None], "b": [None, None]}).with_column(
+        pl.col("b").cast(pl.Float64)
+    )
+    assert out.frame_equal(expected, null_equal=True)
+
 
 def test_getattr() -> None:
     df = pl.DataFrame({"a": [1.0, 2.0]})