Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

depr(python): Rename series_equal/frame_equal to equals #12618

Merged
merged 4 commits into from
Nov 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Miscellaneous

DataFrame.apply
DataFrame.corr
DataFrame.equals
DataFrame.frame_equal
DataFrame.lazy
DataFrame.map_rows
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series/miscellaneous.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Miscellaneous
:toctree: api/

Series.apply
Series.equals
Series.map_elements
Series.reinterpret
Series.series_equal
Expand Down
31 changes: 26 additions & 5 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4578,9 +4578,9 @@ def bottom_k(
)
)

def frame_equal(self, other: DataFrame, *, null_equal: bool = True) -> bool:
def equals(self, other: DataFrame, *, null_equal: bool = True) -> bool:
"""
Check if DataFrame is equal to other.
Check whether the DataFrame is equal to another DataFrame.

Parameters
----------
Expand All @@ -4589,6 +4589,10 @@ def frame_equal(self, other: DataFrame, *, null_equal: bool = True) -> bool:
null_equal
Consider null values as equal.

See Also
--------
assert_frame_equal

Examples
--------
>>> df1 = pl.DataFrame(
Expand All @@ -4605,13 +4609,13 @@ def frame_equal(self, other: DataFrame, *, null_equal: bool = True) -> bool:
... "ham": ["c", "b", "a"],
... }
... )
>>> df1.frame_equal(df1)
>>> df1.equals(df1)
True
>>> df1.frame_equal(df2)
>>> df1.equals(df2)
False

"""
return self._df.frame_equal(other._df, null_equal)
return self._df.equals(other._df, null_equal)

@deprecate_function(
"DataFrame.replace is deprecated and will be removed in a future version. "
Expand Down Expand Up @@ -10477,6 +10481,23 @@ def replace_at_idx(self, index: int, new_column: Series) -> Self:
"""
return self.replace_column(index, new_column)

@deprecate_renamed_function("equals", version="0.19.16")
def frame_equal(self, other: DataFrame, *, null_equal: bool = True) -> bool:
"""
Check whether the DataFrame is equal to another DataFrame.

.. deprecated:: 0.19.16
This method has been renamed to :func:`equals`.

Parameters
----------
other
DataFrame to compare with.
null_equal
Consider null values as equal.
"""
return self.equals(other, null_equal=null_equal)


def _prepare_other_arg(other: Any, length: int | None = None) -> Series:
# if not a series create singleton series such that it will broadcast
Expand Down
6 changes: 3 additions & 3 deletions py-polars/polars/expr/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,9 @@ def truncate(
│ 2001-01-01 18:00:00 │
│ 2001-01-01 22:00:00 │
└─────────────────────┘
>>> df.select(pl.col("datetime").dt.truncate("1h")).frame_equal(
... df.select(pl.col("datetime").dt.truncate(timedelta(hours=1)))
... )
>>> truncate_str = df.select(pl.col("datetime").dt.truncate("1h"))
>>> truncate_td = df.select(pl.col("datetime").dt.truncate(timedelta(hours=1)))
>>> truncate_str.equals(truncate_td)
True

>>> df = pl.datetime_range(
Expand Down
4 changes: 3 additions & 1 deletion py-polars/polars/series/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1675,7 +1675,9 @@ def round(
2001-01-01 19:00:00
2001-01-01 22:00:00
]
>>> s.dt.round("1h").series_equal(s.dt.round(timedelta(hours=1)))
>>> round_str = s.dt.round("1h")
>>> round_td = s.dt.round(timedelta(hours=1))
>>> round_str.equals(round_td)
True

>>> start = datetime(2001, 1, 1)
Expand Down
39 changes: 32 additions & 7 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3737,11 +3737,11 @@ def explode(self) -> Series:

"""

def series_equal(
def equals(
self, other: Series, *, null_equal: bool = True, strict: bool = False
) -> bool:
"""
Check if series is equal with another Series.
Check whether the Series is equal to another Series.

Parameters
----------
Expand All @@ -3753,17 +3753,20 @@ def series_equal(
Don't allow different numerical dtypes, e.g. comparing `pl.UInt32` with a
`pl.Int64` will return `False`.

See Also
--------
assert_series_equal

Examples
--------
>>> s = pl.Series("a", [1, 2, 3])
>>> s1 = pl.Series("a", [1, 2, 3])
>>> s2 = pl.Series("b", [4, 5, 6])
>>> s.series_equal(s)
>>> s1.equals(s1)
True
>>> s.series_equal(s2)
>>> s1.equals(s2)
False

"""
return self._s.series_equal(other._s, null_equal, strict)
return self._s.equals(other._s, null_equal, strict)

def len(self) -> int:
"""
Expand Down Expand Up @@ -7169,6 +7172,28 @@ def map_dict(
"""
return self.replace(mapping, default=default, return_dtype=return_dtype)

@deprecate_renamed_function("equals", version="0.19.16")
def series_equal(
self, other: Series, *, null_equal: bool = True, strict: bool = False
) -> bool:
"""
Check whether the Series is equal to another Series.

.. deprecated:: 0.19.16
This method has been renamed to :meth:`equals`.

Parameters
----------
other
Series to compare with.
null_equal
Consider null values as equal.
strict
Don't allow different numerical dtypes, e.g. comparing `pl.UInt32` with a
`pl.Int64` will return `False`.
"""
return self.equals(other, null_equal=null_equal, strict=strict)

# Keep the `list` and `str` properties below at the end of the definition of Series,
# as to not confuse mypy with the type annotation `str` and `list`

Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1125,7 +1125,7 @@ impl PyDataFrame {
Ok(mask.into_series().into())
}

pub fn frame_equal(&self, other: &PyDataFrame, null_equal: bool) -> bool {
pub fn equals(&self, other: &PyDataFrame, null_equal: bool) -> bool {
if null_equal {
self.df.frame_equal_missing(&other.df)
} else {
Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ impl PySeries {
self.series.has_validity()
}

fn series_equal(&self, other: &PySeries, null_equal: bool, strict: bool) -> bool {
fn equals(&self, other: &PySeries, null_equal: bool, strict: bool) -> bool {
if strict && (self.series.dtype() != other.series.dtype()) {
return false;
}
Expand Down
48 changes: 1 addition & 47 deletions py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,7 @@ def test_to_dummies_drop_first() -> None:

assert dd.columns == ["foo_1", "foo_2", "bar_4", "bar_5", "baz_y", "baz_z"]
assert set(dm.columns) - set(dd.columns) == {"foo_0", "bar_3", "baz_x"}
assert dm.select(dd.columns).frame_equal(dd)
assert_frame_equal(dm.select(dd.columns), dd)
assert dd.rows() == [
(0, 0, 0, 0, 0, 0),
(1, 0, 1, 0, 1, 0),
Expand Down Expand Up @@ -3288,52 +3288,6 @@ def test_iter_slices() -> None:
assert batches[1].rows() == df[50:].rows()


def test_frame_equal() -> None:
# Values are checked
df1 = pl.DataFrame(
{
"foo": [1, 2, 3],
"bar": [6.0, 7.0, 8.0],
"ham": ["a", "b", "c"],
}
)
df2 = pl.DataFrame(
{
"foo": [3, 2, 1],
"bar": [8.0, 7.0, 6.0],
"ham": ["c", "b", "a"],
}
)

assert df1.frame_equal(df1)
assert not df1.frame_equal(df2)

# Column names are checked
df3 = pl.DataFrame(
{
"a": [1, 2, 3],
"b": [6.0, 7.0, 8.0],
"c": ["a", "b", "c"],
}
)
assert not df1.frame_equal(df3)

# Datatypes are NOT checked
df = pl.DataFrame(
{
"foo": [1, 2, None],
"bar": [6.0, 7.0, None],
"ham": ["a", "b", None],
}
)
assert df.frame_equal(df.with_columns(pl.col("foo").cast(pl.Int8)))
assert df.frame_equal(df.with_columns(pl.col("ham").cast(pl.Categorical)))

# The null_equal parameter determines if None values are considered equal
assert df.frame_equal(df)
assert not df.frame_equal(df, null_equal=False)


def test_format_empty_df() -> None:
df = pl.DataFrame(
[
Expand Down
47 changes: 47 additions & 0 deletions py-polars/tests/unit/dataframe/test_equals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import polars as pl


def test_equals() -> None:
# Values are checked
df1 = pl.DataFrame(
{
"foo": [1, 2, 3],
"bar": [6.0, 7.0, 8.0],
"ham": ["a", "b", "c"],
}
)
df2 = pl.DataFrame(
{
"foo": [3, 2, 1],
"bar": [8.0, 7.0, 6.0],
"ham": ["c", "b", "a"],
}
)

assert df1.equals(df1) is True
assert df1.equals(df2) is False

# Column names are checked
df3 = pl.DataFrame(
{
"a": [1, 2, 3],
"b": [6.0, 7.0, 8.0],
"c": ["a", "b", "c"],
}
)
assert df1.equals(df3) is False

# Datatypes are NOT checked
df = pl.DataFrame(
{
"foo": [1, 2, None],
"bar": [6.0, 7.0, None],
"ham": ["a", "b", None],
}
)
assert df.equals(df.with_columns(pl.col("foo").cast(pl.Int8))) is True
assert df.equals(df.with_columns(pl.col("ham").cast(pl.Categorical))) is True

# The null_equal parameter determines if None values are considered equal
assert df.equals(df) is True
assert df.equals(df, null_equal=False) is False
17 changes: 8 additions & 9 deletions py-polars/tests/unit/datatypes/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,19 +397,18 @@ def test_list_any() -> None:


def test_list_min_max() -> None:
for dt in pl.NUMERIC_DTYPES:
if dt == pl.Decimal:
continue
for dt in pl.INTEGER_DTYPES | pl.FLOAT_DTYPES:
df = pl.DataFrame(
{"a": [[1], [1, 2, 3], [1, 2, 3, 4], [1, 2, 3, 4, 5]]},
schema={"a": pl.List(dt)},
)
assert df.select(pl.col("a").list.min())["a"].series_equal(
df.select(pl.col("a").list.first())["a"]
)
assert df.select(pl.col("a").list.max())["a"].series_equal(
df.select(pl.col("a").list.last())["a"]
)
result = df.select(pl.col("a").list.min())
expected = df.select(pl.col("a").list.first())
assert_frame_equal(result, expected)

result = df.select(pl.col("a").list.max())
expected = df.select(pl.col("a").list.last())
assert_frame_equal(result, expected)

df = pl.DataFrame(
{"a": [[1], [1, 5, -1, 3], [1, 2, 3, 4], [1, 2, 3, 4, 5], None]},
Expand Down
4 changes: 2 additions & 2 deletions py-polars/tests/unit/datatypes/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,8 +427,8 @@ def test_timezone() -> None:
# different timezones are not considered equal
# we check both `null_equal=True` and `null_equal=False`
# https://github.com/pola-rs/polars/issues/5023
assert not s.series_equal(tz_s, null_equal=False)
assert not s.series_equal(tz_s, null_equal=True)
assert s.equals(tz_s, null_equal=False) is False
assert s.equals(tz_s, null_equal=True) is False
assert_series_not_equal(tz_s, s)
assert_series_equal(s.cast(int), tz_s.cast(int))

Expand Down
6 changes: 3 additions & 3 deletions py-polars/tests/unit/io/test_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,10 @@ def test_hive_partitioned_projection_pushdown(
parallel=parallel, # type: ignore[arg-type]
)

expect = q.collect().select("category")
actual = q.select("category").collect()
expected = q.collect().select("category")
result = q.select("category").collect()

assert expect.frame_equal(actual)
assert_frame_equal(result, expected)


@pytest.mark.write_disk()
Expand Down
10 changes: 4 additions & 6 deletions py-polars/tests/unit/namespaces/string/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,11 +244,9 @@ def test_str_to_integer_df() -> None:
"hex": ["fa1e", "ff00", "cafe", "invalid", None],
}
)
out = df.with_columns(
[
pl.col("bin").str.to_integer(base=2, strict=False),
pl.col("hex").str.to_integer(base=16, strict=False),
]
result = df.with_columns(
pl.col("bin").str.to_integer(base=2, strict=False),
pl.col("hex").str.to_integer(base=16, strict=False),
)

expected = pl.DataFrame(
Expand All @@ -257,7 +255,7 @@ def test_str_to_integer_df() -> None:
"hex": [64030, 65280, 51966, None, None],
}
)
assert out.frame_equal(expected)
assert_frame_equal(result, expected)

with pytest.raises(pl.ComputeError):
df.with_columns(
Expand Down
Loading