Skip to content

Commit

Permalink
depr(python): Rename series/frame_equal to equals
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Nov 22, 2023
1 parent 1603284 commit 171b610
Show file tree
Hide file tree
Showing 17 changed files with 139 additions and 127 deletions.
6 changes: 3 additions & 3 deletions py-polars/polars/expr/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,9 @@ def truncate(
│ 2001-01-01 18:00:00 │
│ 2001-01-01 22:00:00 │
└─────────────────────┘
>>> df.select(pl.col("datetime").dt.truncate("1h")).frame_equal(
... df.select(pl.col("datetime").dt.truncate(timedelta(hours=1)))
... )
>>> truncate_str = df.select(pl.col("datetime").dt.truncate("1h"))
>>> truncate_td = df.select(pl.col("datetime").dt.truncate(timedelta(hours=1)))
>>> truncate_str.equals(truncate_td)
True
>>> df = pl.datetime_range(
Expand Down
4 changes: 3 additions & 1 deletion py-polars/polars/series/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1675,7 +1675,9 @@ def round(
2001-01-01 19:00:00
2001-01-01 22:00:00
]
>>> s.dt.round("1h").series_equal(s.dt.round(timedelta(hours=1)))
>>> round_str = s.dt.round("1h")
>>> round_td = s.dt.round(timedelta(hours=1))
>>> round_str.equals(round_td)
True
>>> start = datetime(2001, 1, 1)
Expand Down
48 changes: 1 addition & 47 deletions py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,7 @@ def test_to_dummies_drop_first() -> None:

assert dd.columns == ["foo_1", "foo_2", "bar_4", "bar_5", "baz_y", "baz_z"]
assert set(dm.columns) - set(dd.columns) == {"foo_0", "bar_3", "baz_x"}
assert dm.select(dd.columns).frame_equal(dd)
assert_frame_equal(dm.select(dd.columns), dd)
assert dd.rows() == [
(0, 0, 0, 0, 0, 0),
(1, 0, 1, 0, 1, 0),
Expand Down Expand Up @@ -3288,52 +3288,6 @@ def test_iter_slices() -> None:
assert batches[1].rows() == df[50:].rows()


def test_frame_equal() -> None:
# Values are checked
df1 = pl.DataFrame(
{
"foo": [1, 2, 3],
"bar": [6.0, 7.0, 8.0],
"ham": ["a", "b", "c"],
}
)
df2 = pl.DataFrame(
{
"foo": [3, 2, 1],
"bar": [8.0, 7.0, 6.0],
"ham": ["c", "b", "a"],
}
)

assert df1.frame_equal(df1)
assert not df1.frame_equal(df2)

# Column names are checked
df3 = pl.DataFrame(
{
"a": [1, 2, 3],
"b": [6.0, 7.0, 8.0],
"c": ["a", "b", "c"],
}
)
assert not df1.frame_equal(df3)

# Datatypes are NOT checked
df = pl.DataFrame(
{
"foo": [1, 2, None],
"bar": [6.0, 7.0, None],
"ham": ["a", "b", None],
}
)
assert df.frame_equal(df.with_columns(pl.col("foo").cast(pl.Int8)))
assert df.frame_equal(df.with_columns(pl.col("ham").cast(pl.Categorical)))

# The null_equal parameter determines if None values are considered equal
assert df.frame_equal(df)
assert not df.frame_equal(df, null_equal=False)


def test_format_empty_df() -> None:
df = pl.DataFrame(
[
Expand Down
47 changes: 47 additions & 0 deletions py-polars/tests/unit/dataframe/test_equals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import polars as pl


def test_equals() -> None:
# Values are checked
df1 = pl.DataFrame(
{
"foo": [1, 2, 3],
"bar": [6.0, 7.0, 8.0],
"ham": ["a", "b", "c"],
}
)
df2 = pl.DataFrame(
{
"foo": [3, 2, 1],
"bar": [8.0, 7.0, 6.0],
"ham": ["c", "b", "a"],
}
)

assert df1.equals(df1) is True
assert df1.equals(df2) is False

# Column names are checked
df3 = pl.DataFrame(
{
"a": [1, 2, 3],
"b": [6.0, 7.0, 8.0],
"c": ["a", "b", "c"],
}
)
assert df1.equals(df3) is False

# Datatypes are NOT checked
df = pl.DataFrame(
{
"foo": [1, 2, None],
"bar": [6.0, 7.0, None],
"ham": ["a", "b", None],
}
)
assert df.equals(df.with_columns(pl.col("foo").cast(pl.Int8))) is True
assert df.equals(df.with_columns(pl.col("ham").cast(pl.Categorical))) is True

# The null_equal parameter determines if None values are considered equal
assert df.equals(df) is True
assert df.equals(df, null_equal=False) is False
17 changes: 8 additions & 9 deletions py-polars/tests/unit/datatypes/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,19 +397,18 @@ def test_list_any() -> None:


def test_list_min_max() -> None:
for dt in pl.NUMERIC_DTYPES:
if dt == pl.Decimal:
continue
for dt in pl.INTEGER_DTYPES | pl.FLOAT_DTYPES:
df = pl.DataFrame(
{"a": [[1], [1, 2, 3], [1, 2, 3, 4], [1, 2, 3, 4, 5]]},
schema={"a": pl.List(dt)},
)
assert df.select(pl.col("a").list.min())["a"].series_equal(
df.select(pl.col("a").list.first())["a"]
)
assert df.select(pl.col("a").list.max())["a"].series_equal(
df.select(pl.col("a").list.last())["a"]
)
result = df.select(pl.col("a").list.min())
expected = df.select(pl.col("a").list.first())
assert_frame_equal(result, expected)

result = df.select(pl.col("a").list.max())
expected = df.select(pl.col("a").list.last())
assert_frame_equal(result, expected)

df = pl.DataFrame(
{"a": [[1], [1, 5, -1, 3], [1, 2, 3, 4], [1, 2, 3, 4, 5], None]},
Expand Down
4 changes: 2 additions & 2 deletions py-polars/tests/unit/datatypes/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,8 +427,8 @@ def test_timezone() -> None:
# different timezones are not considered equal
# we check both `null_equal=True` and `null_equal=False`
# https://github.com/pola-rs/polars/issues/5023
assert not s.series_equal(tz_s, null_equal=False)
assert not s.series_equal(tz_s, null_equal=True)
assert s.equals(tz_s, null_equal=False) is False
assert s.equals(tz_s, null_equal=True) is False
assert_series_not_equal(tz_s, s)
assert_series_equal(s.cast(int), tz_s.cast(int))

Expand Down
6 changes: 3 additions & 3 deletions py-polars/tests/unit/io/test_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,10 @@ def test_hive_partitioned_projection_pushdown(
parallel=parallel, # type: ignore[arg-type]
)

expect = q.collect().select("category")
actual = q.select("category").collect()
expected = q.collect().select("category")
result = q.select("category").collect()

assert expect.frame_equal(actual)
assert_frame_equal(result, expected)


@pytest.mark.write_disk()
Expand Down
10 changes: 4 additions & 6 deletions py-polars/tests/unit/namespaces/string/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,11 +244,9 @@ def test_str_to_integer_df() -> None:
"hex": ["fa1e", "ff00", "cafe", "invalid", None],
}
)
out = df.with_columns(
[
pl.col("bin").str.to_integer(base=2, strict=False),
pl.col("hex").str.to_integer(base=16, strict=False),
]
result = df.with_columns(
pl.col("bin").str.to_integer(base=2, strict=False),
pl.col("hex").str.to_integer(base=16, strict=False),
)

expected = pl.DataFrame(
Expand All @@ -257,7 +255,7 @@ def test_str_to_integer_df() -> None:
"hex": [64030, 65280, 51966, None, None],
}
)
assert out.frame_equal(expected)
assert_frame_equal(result, expected)

with pytest.raises(pl.ComputeError):
df.with_columns(
Expand Down
5 changes: 3 additions & 2 deletions py-polars/tests/unit/namespaces/test_binary.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest

import polars as pl
from polars.testing import assert_frame_equal
from polars.type_aliases import TransferEncoding


Expand Down Expand Up @@ -132,7 +133,7 @@ def test_compare_encode_between_lazy_and_eager_6814(encoding: TransferEncoding)
result_eager = df.select(expr)
dtype = result_eager["x"].dtype
result_lazy = df.lazy().select(expr).select(pl.col(dtype)).collect()
assert result_eager.frame_equal(result_lazy)
assert_frame_equal(result_eager, result_lazy)


@pytest.mark.parametrize(
Expand All @@ -148,4 +149,4 @@ def test_compare_decode_between_lazy_and_eager_6814(encoding: TransferEncoding)
result_eager = df.select(expr)
dtype = result_eager["x"].dtype
result_lazy = df.lazy().select(expr).select(pl.col(dtype)).collect()
assert result_eager.frame_equal(result_lazy)
assert_frame_equal(result_eager, result_lazy)
9 changes: 4 additions & 5 deletions py-polars/tests/unit/namespaces/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,11 +492,10 @@ def test_list_gather_logical_type() -> None:


def test_list_unique() -> None:
assert (
pl.Series([[1, 1, 2, 2, 3], [3, 3, 3, 2, 1, 2]])
.list.unique(maintain_order=True)
.series_equal(pl.Series([[1, 2, 3], [3, 2, 1]]))
)
s = pl.Series([[1, 1, 2, 2, 3], [3, 3, 3, 2, 1, 2]])
result = s.list.unique(maintain_order=True)
expected = pl.Series([[1, 2, 3], [3, 2, 1]])
assert_series_equal(result, expected)


def test_list_to_struct() -> None:
Expand Down
15 changes: 11 additions & 4 deletions py-polars/tests/unit/operations/test_group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -807,18 +807,25 @@ def test_group_by_list_scalar_11749() -> None:

def test_group_by_with_expr_as_key() -> None:
gb = pl.select(x=1).group_by(pl.col("x").alias("key"))
assert gb.agg(pl.all().first()).frame_equal(gb.agg(pl.first("x")))
result = gb.agg(pl.all().first())
expected = gb.agg(pl.first("x"))
assert_frame_equal(result, expected)

# tests: 11766
assert gb.head(0).frame_equal(gb.agg(pl.col("x").head(0)).explode("x"))
assert gb.tail(0).frame_equal(gb.agg(pl.col("x").tail(0)).explode("x"))
result = gb.head(0)
expected = gb.agg(pl.col("x").head(0)).explode("x")
assert_frame_equal(result, expected)

result = gb.tail(0)
expected = gb.agg(pl.col("x").tail(0)).explode("x")
assert_frame_equal(result, expected)


def test_lazy_group_by_reuse_11767() -> None:
lgb = pl.select(x=1).lazy().group_by("x")
a = lgb.count()
b = lgb.count()
assert a.collect().frame_equal(b.collect())
assert_frame_equal(a, b)


def test_group_by_double_on_empty_12194() -> None:
Expand Down
27 changes: 27 additions & 0 deletions py-polars/tests/unit/series/test_equals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from datetime import datetime

import polars as pl


def test_equals() -> None:
s1 = pl.Series("a", [1.0, 2.0, None], pl.Float64)
s2 = pl.Series("a", [1, 2, None], pl.Int64)

assert s1.equals(s2) is True
assert s1.equals(s2, strict=True) is False
assert s1.equals(s2, null_equal=False) is False

df = pl.DataFrame(
{"dtm": [datetime(2222, 2, 22, 22, 22, 22)]},
schema_overrides={"dtm": pl.Datetime(time_zone="UTC")},
).with_columns(
s3=pl.col("dtm").dt.convert_time_zone("Europe/London"),
s4=pl.col("dtm").dt.convert_time_zone("Asia/Tokyo"),
)
s3 = df["s3"].rename("b")
s4 = df["s4"].rename("b")

assert s3.equals(s4) is False
assert s3.equals(s4, strict=True) is False
assert s3.equals(s4, null_equal=False) is False
assert s3.dt.convert_time_zone("Asia/Tokyo").equals(s4) is True
24 changes: 0 additions & 24 deletions py-polars/tests/unit/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,30 +254,6 @@ def test_concat() -> None:
assert s.len() == 3


def test_equal() -> None:
s1 = pl.Series("a", [1.0, 2.0, None], Float64)
s2 = pl.Series("a", [1, 2, None], Int64)

assert s1.series_equal(s2) is True
assert s1.series_equal(s2, strict=True) is False
assert s1.series_equal(s2, null_equal=False) is False

df = pl.DataFrame(
{"dtm": [datetime(2222, 2, 22, 22, 22, 22)]},
schema_overrides={"dtm": Datetime(time_zone="UTC")},
).with_columns(
s3=pl.col("dtm").dt.convert_time_zone("Europe/London"),
s4=pl.col("dtm").dt.convert_time_zone("Asia/Tokyo"),
)
s3 = df["s3"].rename("b")
s4 = df["s4"].rename("b")

assert s3.series_equal(s4) is False
assert s3.series_equal(s4, strict=True) is False
assert s3.series_equal(s4, null_equal=False) is False
assert s3.dt.convert_time_zone("Asia/Tokyo").series_equal(s4) is True


@pytest.mark.parametrize(
"dtype",
[pl.Int64, pl.Float64, pl.Utf8, pl.Boolean],
Expand Down
18 changes: 9 additions & 9 deletions py-polars/tests/unit/sql/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -1186,10 +1186,11 @@ def test_sql_expr() -> None:
"SUBSTR(b,1,2) AS b2",
]
)
result = df.select(*sql_exprs)
expected = pl.DataFrame(
{"a": [1, 1, 1], "aa": [1, 4, 27], "b2": ["yz", "bc", None]}
{"a": [1, 1, 1], "aa": [1.0, 4.0, 27.0], "b2": ["yz", "bc", None]}
)
assert df.select(*sql_exprs).frame_equal(expected)
assert_frame_equal(result, expected)

# expect expressions that can't reasonably be parsed as expressions to raise
# (for example: those that explicitly reference tables and/or use wildcards)
Expand Down Expand Up @@ -1249,12 +1250,11 @@ def test_sql_date() -> None:
)

with pl.SQLContext(df=df, eager_execution=True) as ctx:
expected = pl.DataFrame({"date": [True, False, False]})
assert ctx.execute("SELECT date < DATE('2021-03-20') from df").frame_equal(
expected
)
result = ctx.execute("SELECT date < DATE('2021-03-20') from df")

expected = pl.DataFrame({"date": [True, False, False]})
assert_frame_equal(result, expected)

result = pl.select(pl.sql_expr("""CAST(DATE('2023-03', '%Y-%m') as STRING)"""))
expected = pl.DataFrame({"literal": ["2023-03-01"]})
assert pl.select(
pl.sql_expr("""CAST(DATE('2023-03', '%Y-%m') as STRING)""")
).frame_equal(expected)
assert_frame_equal(result, expected)
8 changes: 4 additions & 4 deletions py-polars/tests/unit/test_empty.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ def test_empty_cross_join() -> None:

def test_empty_string_replace() -> None:
s = pl.Series("", [], dtype=pl.Utf8)
assert s.str.replace("a", "b", literal=True).series_equal(s)
assert s.str.replace("a", "b").series_equal(s)
assert s.str.replace("ab", "b", literal=True).series_equal(s)
assert s.str.replace("ab", "b").series_equal(s)
assert_series_equal(s.str.replace("a", "b", literal=True), s)
assert_series_equal(s.str.replace("a", "b"), s)
assert_series_equal(s.str.replace("ab", "b", literal=True), s)
assert_series_equal(s.str.replace("ab", "b"), s)


def test_empty_window_function() -> None:
Expand Down
Loading

0 comments on commit 171b610

Please sign in to comment.