Skip to content

Commit

Permalink
depr(python): Rename series_equal/frame_equal to equals (#12618)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Nov 23, 2023
1 parent fa1a66b commit 1694283
Show file tree
Hide file tree
Showing 23 changed files with 201 additions and 141 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Miscellaneous

DataFrame.apply
DataFrame.corr
DataFrame.equals
DataFrame.frame_equal
DataFrame.lazy
DataFrame.map_rows
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series/miscellaneous.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Miscellaneous
:toctree: api/

Series.apply
Series.equals
Series.map_elements
Series.reinterpret
Series.series_equal
Expand Down
31 changes: 26 additions & 5 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4578,9 +4578,9 @@ def bottom_k(
)
)

def frame_equal(self, other: DataFrame, *, null_equal: bool = True) -> bool:
def equals(self, other: DataFrame, *, null_equal: bool = True) -> bool:
"""
Check if DataFrame is equal to other.
Check whether the DataFrame is equal to another DataFrame.
Parameters
----------
Expand All @@ -4589,6 +4589,10 @@ def frame_equal(self, other: DataFrame, *, null_equal: bool = True) -> bool:
null_equal
Consider null values as equal.
See Also
--------
assert_frame_equal
Examples
--------
>>> df1 = pl.DataFrame(
Expand All @@ -4605,13 +4609,13 @@ def frame_equal(self, other: DataFrame, *, null_equal: bool = True) -> bool:
... "ham": ["c", "b", "a"],
... }
... )
>>> df1.frame_equal(df1)
>>> df1.equals(df1)
True
>>> df1.frame_equal(df2)
>>> df1.equals(df2)
False
"""
return self._df.frame_equal(other._df, null_equal)
return self._df.equals(other._df, null_equal)

@deprecate_function(
"DataFrame.replace is deprecated and will be removed in a future version. "
Expand Down Expand Up @@ -10477,6 +10481,23 @@ def replace_at_idx(self, index: int, new_column: Series) -> Self:
"""
return self.replace_column(index, new_column)

@deprecate_renamed_function("equals", version="0.19.16")
def frame_equal(self, other: DataFrame, *, null_equal: bool = True) -> bool:
"""
Check whether the DataFrame is equal to another DataFrame.
.. deprecated:: 0.19.16
This method has been renamed to :func:`equals`.
Parameters
----------
other
DataFrame to compare with.
null_equal
Consider null values as equal.
"""
return self.equals(other, null_equal=null_equal)


def _prepare_other_arg(other: Any, length: int | None = None) -> Series:
# if not a series create singleton series such that it will broadcast
Expand Down
6 changes: 3 additions & 3 deletions py-polars/polars/expr/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,9 @@ def truncate(
│ 2001-01-01 18:00:00 │
│ 2001-01-01 22:00:00 │
└─────────────────────┘
>>> df.select(pl.col("datetime").dt.truncate("1h")).frame_equal(
... df.select(pl.col("datetime").dt.truncate(timedelta(hours=1)))
... )
>>> truncate_str = df.select(pl.col("datetime").dt.truncate("1h"))
>>> truncate_td = df.select(pl.col("datetime").dt.truncate(timedelta(hours=1)))
>>> truncate_str.equals(truncate_td)
True
>>> df = pl.datetime_range(
Expand Down
4 changes: 3 additions & 1 deletion py-polars/polars/series/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1675,7 +1675,9 @@ def round(
2001-01-01 19:00:00
2001-01-01 22:00:00
]
>>> s.dt.round("1h").series_equal(s.dt.round(timedelta(hours=1)))
>>> round_str = s.dt.round("1h")
>>> round_td = s.dt.round(timedelta(hours=1))
>>> round_str.equals(round_td)
True
>>> start = datetime(2001, 1, 1)
Expand Down
39 changes: 32 additions & 7 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3737,11 +3737,11 @@ def explode(self) -> Series:
"""

def series_equal(
def equals(
self, other: Series, *, null_equal: bool = True, strict: bool = False
) -> bool:
"""
Check if series is equal with another Series.
Check whether the Series is equal to another Series.
Parameters
----------
Expand All @@ -3753,17 +3753,20 @@ def series_equal(
Don't allow different numerical dtypes, e.g. comparing `pl.UInt32` with a
`pl.Int64` will return `False`.
See Also
--------
assert_series_equal
Examples
--------
>>> s = pl.Series("a", [1, 2, 3])
>>> s1 = pl.Series("a", [1, 2, 3])
>>> s2 = pl.Series("b", [4, 5, 6])
>>> s.series_equal(s)
>>> s1.equals(s1)
True
>>> s.series_equal(s2)
>>> s1.equals(s2)
False
"""
return self._s.series_equal(other._s, null_equal, strict)
return self._s.equals(other._s, null_equal, strict)

def len(self) -> int:
"""
Expand Down Expand Up @@ -7169,6 +7172,28 @@ def map_dict(
"""
return self.replace(mapping, default=default, return_dtype=return_dtype)

@deprecate_renamed_function("equals", version="0.19.16")
def series_equal(
self, other: Series, *, null_equal: bool = True, strict: bool = False
) -> bool:
"""
Check whether the Series is equal to another Series.
.. deprecated:: 0.19.16
This method has been renamed to :meth:`equals`.
Parameters
----------
other
Series to compare with.
null_equal
Consider null values as equal.
strict
Don't allow different numerical dtypes, e.g. comparing `pl.UInt32` with a
`pl.Int64` will return `False`.
"""
return self.equals(other, null_equal=null_equal, strict=strict)

# Keep the `list` and `str` properties below at the end of the definition of Series,
# as to not confuse mypy with the type annotation `str` and `list`

Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1125,7 +1125,7 @@ impl PyDataFrame {
Ok(mask.into_series().into())
}

pub fn frame_equal(&self, other: &PyDataFrame, null_equal: bool) -> bool {
pub fn equals(&self, other: &PyDataFrame, null_equal: bool) -> bool {
if null_equal {
self.df.frame_equal_missing(&other.df)
} else {
Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ impl PySeries {
self.series.has_validity()
}

fn series_equal(&self, other: &PySeries, null_equal: bool, strict: bool) -> bool {
fn equals(&self, other: &PySeries, null_equal: bool, strict: bool) -> bool {
if strict && (self.series.dtype() != other.series.dtype()) {
return false;
}
Expand Down
48 changes: 1 addition & 47 deletions py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,7 @@ def test_to_dummies_drop_first() -> None:

assert dd.columns == ["foo_1", "foo_2", "bar_4", "bar_5", "baz_y", "baz_z"]
assert set(dm.columns) - set(dd.columns) == {"foo_0", "bar_3", "baz_x"}
assert dm.select(dd.columns).frame_equal(dd)
assert_frame_equal(dm.select(dd.columns), dd)
assert dd.rows() == [
(0, 0, 0, 0, 0, 0),
(1, 0, 1, 0, 1, 0),
Expand Down Expand Up @@ -3288,52 +3288,6 @@ def test_iter_slices() -> None:
assert batches[1].rows() == df[50:].rows()


def test_frame_equal() -> None:
# Values are checked
df1 = pl.DataFrame(
{
"foo": [1, 2, 3],
"bar": [6.0, 7.0, 8.0],
"ham": ["a", "b", "c"],
}
)
df2 = pl.DataFrame(
{
"foo": [3, 2, 1],
"bar": [8.0, 7.0, 6.0],
"ham": ["c", "b", "a"],
}
)

assert df1.frame_equal(df1)
assert not df1.frame_equal(df2)

# Column names are checked
df3 = pl.DataFrame(
{
"a": [1, 2, 3],
"b": [6.0, 7.0, 8.0],
"c": ["a", "b", "c"],
}
)
assert not df1.frame_equal(df3)

# Datatypes are NOT checked
df = pl.DataFrame(
{
"foo": [1, 2, None],
"bar": [6.0, 7.0, None],
"ham": ["a", "b", None],
}
)
assert df.frame_equal(df.with_columns(pl.col("foo").cast(pl.Int8)))
assert df.frame_equal(df.with_columns(pl.col("ham").cast(pl.Categorical)))

# The null_equal parameter determines if None values are considered equal
assert df.frame_equal(df)
assert not df.frame_equal(df, null_equal=False)


def test_format_empty_df() -> None:
df = pl.DataFrame(
[
Expand Down
47 changes: 47 additions & 0 deletions py-polars/tests/unit/dataframe/test_equals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import polars as pl


def test_equals() -> None:
# Values are checked
df1 = pl.DataFrame(
{
"foo": [1, 2, 3],
"bar": [6.0, 7.0, 8.0],
"ham": ["a", "b", "c"],
}
)
df2 = pl.DataFrame(
{
"foo": [3, 2, 1],
"bar": [8.0, 7.0, 6.0],
"ham": ["c", "b", "a"],
}
)

assert df1.equals(df1) is True
assert df1.equals(df2) is False

# Column names are checked
df3 = pl.DataFrame(
{
"a": [1, 2, 3],
"b": [6.0, 7.0, 8.0],
"c": ["a", "b", "c"],
}
)
assert df1.equals(df3) is False

# Datatypes are NOT checked
df = pl.DataFrame(
{
"foo": [1, 2, None],
"bar": [6.0, 7.0, None],
"ham": ["a", "b", None],
}
)
assert df.equals(df.with_columns(pl.col("foo").cast(pl.Int8))) is True
assert df.equals(df.with_columns(pl.col("ham").cast(pl.Categorical))) is True

# The null_equal parameter determines if None values are considered equal
assert df.equals(df) is True
assert df.equals(df, null_equal=False) is False
17 changes: 8 additions & 9 deletions py-polars/tests/unit/datatypes/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,19 +397,18 @@ def test_list_any() -> None:


def test_list_min_max() -> None:
for dt in pl.NUMERIC_DTYPES:
if dt == pl.Decimal:
continue
for dt in pl.INTEGER_DTYPES | pl.FLOAT_DTYPES:
df = pl.DataFrame(
{"a": [[1], [1, 2, 3], [1, 2, 3, 4], [1, 2, 3, 4, 5]]},
schema={"a": pl.List(dt)},
)
assert df.select(pl.col("a").list.min())["a"].series_equal(
df.select(pl.col("a").list.first())["a"]
)
assert df.select(pl.col("a").list.max())["a"].series_equal(
df.select(pl.col("a").list.last())["a"]
)
result = df.select(pl.col("a").list.min())
expected = df.select(pl.col("a").list.first())
assert_frame_equal(result, expected)

result = df.select(pl.col("a").list.max())
expected = df.select(pl.col("a").list.last())
assert_frame_equal(result, expected)

df = pl.DataFrame(
{"a": [[1], [1, 5, -1, 3], [1, 2, 3, 4], [1, 2, 3, 4, 5], None]},
Expand Down
4 changes: 2 additions & 2 deletions py-polars/tests/unit/datatypes/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,8 +427,8 @@ def test_timezone() -> None:
# different timezones are not considered equal
# we check both `null_equal=True` and `null_equal=False`
# https://github.com/pola-rs/polars/issues/5023
assert not s.series_equal(tz_s, null_equal=False)
assert not s.series_equal(tz_s, null_equal=True)
assert s.equals(tz_s, null_equal=False) is False
assert s.equals(tz_s, null_equal=True) is False
assert_series_not_equal(tz_s, s)
assert_series_equal(s.cast(int), tz_s.cast(int))

Expand Down
6 changes: 3 additions & 3 deletions py-polars/tests/unit/io/test_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,10 @@ def test_hive_partitioned_projection_pushdown(
parallel=parallel, # type: ignore[arg-type]
)

expect = q.collect().select("category")
actual = q.select("category").collect()
expected = q.collect().select("category")
result = q.select("category").collect()

assert expect.frame_equal(actual)
assert_frame_equal(result, expected)


@pytest.mark.write_disk()
Expand Down
10 changes: 4 additions & 6 deletions py-polars/tests/unit/namespaces/string/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,11 +244,9 @@ def test_str_to_integer_df() -> None:
"hex": ["fa1e", "ff00", "cafe", "invalid", None],
}
)
out = df.with_columns(
[
pl.col("bin").str.to_integer(base=2, strict=False),
pl.col("hex").str.to_integer(base=16, strict=False),
]
result = df.with_columns(
pl.col("bin").str.to_integer(base=2, strict=False),
pl.col("hex").str.to_integer(base=16, strict=False),
)

expected = pl.DataFrame(
Expand All @@ -257,7 +255,7 @@ def test_str_to_integer_df() -> None:
"hex": [64030, 65280, 51966, None, None],
}
)
assert out.frame_equal(expected)
assert_frame_equal(result, expected)

with pytest.raises(pl.ComputeError):
df.with_columns(
Expand Down
Loading

0 comments on commit 1694283

Please sign in to comment.