Skip to content

Commit

Permalink
Make deprecation warnings into actual DepreationWarning + fix warning…
Browse files Browse the repository at this point in the history
…s in test suite (#3158)
  • Loading branch information
zundertj committed Apr 16, 2022
1 parent c3d6f9a commit 527c80a
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 85 deletions.
5 changes: 3 additions & 2 deletions py-polars/polars/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,8 +560,9 @@ def arrow_to_pydf(

if len(dictionary_cols) > 0:
df = pli.wrap_df(pydf)
for i, s in dictionary_cols.items():
df[s.name] = s
df = df.with_columns(
[pli.lit(s).alias(s.name) for s in dictionary_cols.values()]
)
df = df[names]
pydf = df._df

Expand Down
45 changes: 33 additions & 12 deletions py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -949,7 +949,9 @@ def to_json(
.. deprecated:: 0.13.12
Please use `write_json`
"""
warnings.warn("'to_json' is deprecated. please use 'write_json'")
warnings.warn(
"'to_json' is deprecated. please use 'write_json'", DeprecationWarning
)
return self.write_json(
file, pretty, row_oriented, json_lines, to_string=to_string
)
Expand Down Expand Up @@ -1127,7 +1129,9 @@ def to_csv(
.. deprecated:: 0.13.12
Please use `write_csv`
"""
warnings.warn("'to_csv' is deprecated. please use 'write_csv'")
warnings.warn(
"'to_csv' is deprecated. please use 'write_csv'", DeprecationWarning
)
return self.write_csv(file, has_header, sep)

def write_avro(
Expand Down Expand Up @@ -1162,7 +1166,9 @@ def to_avro(
.. deprecated:: 0.13.12
Please use `write_avro`
"""
warnings.warn("'to_avro' is deprecated. please use 'write_avro'")
warnings.warn(
"'to_avro' is deprecated. please use 'write_avro'", DeprecationWarning
)
return self.write_avro(file, compression)

def write_ipc(
Expand Down Expand Up @@ -1199,7 +1205,9 @@ def to_ipc(
.. deprecated:: 0.13.12
Please use `write_ipc`
"""
warnings.warn("'to_ipc' is deprecated. please use 'write_ipc'")
warnings.warn(
"'to_ipc' is deprecated. please use 'write_ipc'", DeprecationWarning
)
return self.write_ipc(file, compression)

def to_dicts(self) -> List[Dict[str, Any]]:
Expand Down Expand Up @@ -1418,7 +1426,9 @@ def to_parquet(
.. deprecated:: 0.13.12
Please use `write_parquet`
"""
warnings.warn("'to_parquet' is deprecated. please use 'write_parquet'")
warnings.warn(
"'to_parquet' is deprecated. please use 'write_parquet'", DeprecationWarning
)
return self.write_parquet(file, compression, statistics, use_pyarrow, **kwargs)

def to_numpy(self) -> np.ndarray:
Expand Down Expand Up @@ -1517,7 +1527,10 @@ def __getattr__(self, item: Any) -> "PySeries":
if item.startswith("_"):
raise AttributeError(item)
try:
warnings.warn("accessing series as Attribute of a DataFrame is deprecated")
warnings.warn(
"accessing series as Attribute of a DataFrame is deprecated",
DeprecationWarning,
)
return pli.wrap_s(self._df.column(item))
except Exception:
raise AttributeError(item)
Expand Down Expand Up @@ -1734,7 +1747,8 @@ def __setitem__(
self, key: Union[str, List, Tuple[Any, Union[str, int]]], value: Any
) -> None:
warnings.warn(
"setting a DataFrame by indexing is deprecated; Consider using DataFrame.with_column"
"setting a DataFrame by indexing is deprecated; Consider using DataFrame.with_column",
DeprecationWarning,
)
# df["foo"] = series
if isinstance(key, str):
Expand Down Expand Up @@ -2279,13 +2293,17 @@ def sort(
)
if in_place:
warnings.warn(
"in-place sorting is deprecated; please use default sorting"
"in-place sorting is deprecated; please use default sorting",
DeprecationWarning,
)
self._df = df._df
return self
return df
if in_place:
warnings.warn("in-place sorting is deprecated; please use default sorting")
warnings.warn(
"in-place sorting is deprecated; please use default sorting",
DeprecationWarning,
)
self._df.sort_in_place(by, reverse)
return None
else:
Expand Down Expand Up @@ -3437,7 +3455,8 @@ def join(
"""
if how == "asof":
warnings.warn(
"using asof join via DataFrame.join is deprecated, please use DataFrame.join_asof"
"using asof join via DataFrame.join is deprecated, please use DataFrame.join_asof",
DeprecationWarning,
)
if how == "cross":
return self._from_pydf(self._df.join(df._df, [], [], how, suffix))
Expand Down Expand Up @@ -5385,7 +5404,8 @@ def _select(self, columns: Union[str, List[str]]) -> "GBSelection[DF]":
One or multiple columns.
"""
warnings.warn(
"accessing GroupBy by index is deprecated, consider using the `.agg` method"
"accessing GroupBy by index is deprecated, consider using the `.agg` method",
DeprecationWarning,
)
if isinstance(columns, str):
columns = [columns]
Expand Down Expand Up @@ -5472,7 +5492,8 @@ def groups(self) -> DF:
* the group indexes aggregated as lists
"""
warnings.warn(
"accessing GroupBy by index is deprecated, consider using the `.agg` method"
"accessing GroupBy by index is deprecated, consider using the `.agg` method",
DeprecationWarning,
)
return self._dataframe_class._from_pydf(
self._df.groupby(self.by, None, "groups")
Expand Down
3 changes: 2 additions & 1 deletion py-polars/polars/internals/lazy_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1145,7 +1145,8 @@ def join(
"""
if how == "asof":
warnings.warn(
"using asof join via LazyFrame.join is deprecated, please use LazyFrame.join_asof"
"using asof join via LazyFrame.join is deprecated, please use LazyFrame.join_asof",
DeprecationWarning,
)
if how == "cross":
return self._from_pyldf(
Expand Down
133 changes: 70 additions & 63 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,8 +398,8 @@ def test_selection() -> None:
assert df[[0, 1], "b"].shape == (2, 1)
assert df[[2], ["a", "b"]].shape == (1, 2)
assert df.select_at_idx(0).name == "a"
assert (df.a == df["a"]).sum() == 3
assert (df.c == df["a"]).sum() == 0
assert (df["a"] == df["a"]).sum() == 3
assert (df["c"] == df["a"]).sum() == 0
assert df[:, "a":"b"].shape == (3, 2) # type: ignore
assert df[:, "a":"c"].columns == ["a", "b", "c"] # type: ignore
expect = pl.DataFrame({"c": ["b"]})
Expand Down Expand Up @@ -436,12 +436,14 @@ def test_from_arrow() -> None:

def test_sort() -> None:
df = pl.DataFrame({"a": [2, 1, 3], "b": [1, 2, 3]})
df.sort("a", in_place=True)
with pytest.deprecated_call():
df.sort("a", in_place=True)
assert df.frame_equal(pl.DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]}))

# test in-place + passing a list
df = pl.DataFrame({"a": [2, 1, 3], "b": [1, 2, 3]})
df.sort(["a", "b"], in_place=True)
with pytest.deprecated_call():
df.sort(["a", "b"], in_place=True)
assert df.frame_equal(pl.DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]}))


Expand All @@ -454,10 +456,12 @@ def test_replace() -> None:

def test_assignment() -> None:
df = pl.DataFrame({"foo": [1, 2, 3], "bar": [2, 3, 4]})
df["foo"] = df["foo"]
df = df.with_column(pl.col("foo").alias("foo"))
# make sure that assignment does not change column order
assert df.columns == ["foo", "bar"]
df[df["foo"] > 1, "foo"] = 9
df = df.with_column(
pl.when(pl.col("foo") > 1).then(9).otherwise(pl.col("foo")).alias("foo")
)
assert df["foo"].to_list() == [1, 9, 9]


Expand Down Expand Up @@ -550,17 +554,15 @@ def test_groupby() -> None:
# )
assert df.groupby("a").apply(lambda df: df[["c"]].sum()).sort("c")["c"][0] == 1

assert (
df.groupby("a")
.groups()
.sort("a")["a"]
.series_equal(pl.Series("a", ["a", "b", "c"]))
)
df_groups = df.groupby("a").groups().sort("a")
assert df_groups["a"].series_equal(pl.Series("a", ["a", "b", "c"]))

for subdf in df.groupby("a"): # type: ignore
# TODO: add __next__() to GroupBy
if subdf["a"][0] == "b":
assert subdf.shape == (3, 3)
with pytest.deprecated_call():
# TODO: find a way to avoid indexing into GroupBy
for subdf in df.groupby("a"): # type: ignore
# TODO: add __next__() to GroupBy
if subdf["a"][0] == "b":
assert subdf.shape == (3, 3)

assert df.groupby("a").get_group("c").shape == (1, 3)
assert df.groupby("a").get_group("b").shape == (3, 3)
Expand All @@ -583,9 +585,9 @@ def test_groupby() -> None:
df.groupby("b").agg(pl.col("c").forward_fill()).explode("c")

# get a specific column
result = df.groupby("b")["a"].count()
result = df.groupby("b").agg(pl.count("a"))
assert result.shape == (2, 2)
assert result.columns == ["b", "a_count"]
assert result.columns == ["b", "a"]

# make sure all the methods below run
assert df.groupby("b").first().shape == (2, 3)
Expand Down Expand Up @@ -838,47 +840,49 @@ def test_file_buffer() -> None:


def test_set() -> None:
np.random.seed(1)
df = pl.DataFrame(
{"foo": np.random.rand(10), "bar": np.arange(10), "ham": ["h"] * 10}
)
df["new"] = np.random.rand(10)
df[df["new"] > 0.5, "new"] = 1
"""Setting a dataframe using indices is deprecated. We keep these tests because we only generate a warning"""
with pytest.deprecated_call():
np.random.seed(1)
df = pl.DataFrame(
{"foo": np.random.rand(10), "bar": np.arange(10), "ham": ["h"] * 10}
)
df["new"] = np.random.rand(10)
df[df["new"] > 0.5, "new"] = 1

# set 2D
df = pl.DataFrame({"b": [0, 0]})
df[["A", "B"]] = [[1, 2], [1, 2]]
assert df["A"] == [1, 1]
assert df["B"] == [2, 2]
# set 2D
df = pl.DataFrame({"b": [0, 0]})
df[["A", "B"]] = [[1, 2], [1, 2]]
assert df["A"] == [1, 1]
assert df["B"] == [2, 2]

with pytest.raises(ValueError):
df[["C", "D"]] = 1
with pytest.raises(ValueError):
df[["C", "D"]] = [1, 1]
with pytest.raises(ValueError):
df[["C", "D"]] = [[1, 2, 3], [1, 2, 3]]
with pytest.raises(ValueError):
df[["C", "D"]] = 1
with pytest.raises(ValueError):
df[["C", "D"]] = [1, 1]
with pytest.raises(ValueError):
df[["C", "D"]] = [[1, 2, 3], [1, 2, 3]]

# set tuple
df = pl.DataFrame({"b": [0, 0]})
df[0, "b"] = 1
assert df[0, "b"] == 1
# set tuple
df = pl.DataFrame({"b": [0, 0]})
df[0, "b"] = 1
assert df[0, "b"] == 1

df[0, 0] = 2
assert df[0, "b"] == 2
df[0, 0] = 2
assert df[0, "b"] == 2

# row and col selection have to be int or str
with pytest.raises(ValueError):
df[:, [1]] = 1 # type: ignore
with pytest.raises(ValueError):
df[True, :] = 1 # type: ignore
# row and col selection have to be int or str
with pytest.raises(ValueError):
df[:, [1]] = 1 # type: ignore
with pytest.raises(ValueError):
df[True, :] = 1 # type: ignore

# needs to be a 2 element tuple
with pytest.raises(ValueError):
df[(1, 2, 3)] = 1 # type: ignore
# needs to be a 2 element tuple
with pytest.raises(ValueError):
df[(1, 2, 3)] = 1 # type: ignore

# we cannot index with any type, such as bool
with pytest.raises(NotImplementedError):
df[True] = 1 # type: ignore
# we cannot index with any type, such as bool
with pytest.raises(NotImplementedError):
df[True] = 1 # type: ignore


def test_melt() -> None:
Expand Down Expand Up @@ -1184,7 +1188,7 @@ def test_assign() -> None:
# check if can assign in case of a single column
df = pl.DataFrame({"a": [1, 2, 3]})
# test if we can assign in case of single column
df["a"] = df["a"] * 2
df = df.with_column(pl.col("a") * 2)
assert df["a"] == [2, 4, 6]


Expand Down Expand Up @@ -1234,7 +1238,7 @@ def test_rename(df: pl.DataFrame) -> None:
_ = out[["foos", "bars"]]


def test_to_csv() -> None:
def test_write_csv() -> None:
df = pl.DataFrame(
{
"foo": [1, 2, 3, 4, 5],
Expand All @@ -1244,13 +1248,13 @@ def test_to_csv() -> None:
)
expected = "foo,bar,ham\n1,6,a\n2,7,b\n3,8,c\n4,9,d\n5,10,e\n"

# if no file argument is supplied, to_csv() will return the string
s = df.to_csv()
# if no file argument is supplied, write_csv() will return the string
s = df.write_csv()
assert s == expected

# otherwise it will write to the file/iobuffer
file = BytesIO()
df.to_csv(file)
df.write_csv(file)
file.seek(0)
s = file.read().decode("utf8")
assert s == expected
Expand Down Expand Up @@ -1802,11 +1806,12 @@ def test_add_string() -> None:


def test_getattr() -> None:
df = pl.DataFrame({"a": [1.0, 2.0]})
testing.assert_series_equal(df.a, pl.Series("a", [1.0, 2.0]))
with pytest.deprecated_call():
df = pl.DataFrame({"a": [1.0, 2.0]})
testing.assert_series_equal(df.a, pl.Series("a", [1.0, 2.0]))

with pytest.raises(AttributeError):
_ = df.b
with pytest.raises(AttributeError):
_ = df.b


def test_get_item() -> None:
Expand Down Expand Up @@ -1990,7 +1995,7 @@ class MyDataFrame(pl.DataFrame):


def test_preservation_of_subclasses_after_groupby_statements() -> None:
"""Group by operations should preserve inherited datframe classes."""
"""Group by operations should preserve inherited dataframe classes."""

class SubClassedDataFrame(pl.DataFrame):
pass
Expand All @@ -2001,7 +2006,9 @@ class SubClassedDataFrame(pl.DataFrame):
assert isinstance(groupby.agg(pl.count()), SubClassedDataFrame)

# Round-trips to GBSelection and back should also preserve subclass
assert isinstance(groupby["a"].count(), SubClassedDataFrame)
assert isinstance(
groupby.agg(pl.col("a").count().alias("count")), SubClassedDataFrame
)

# Round-trips to PivotOps and back should also preserve subclass
assert isinstance(
Expand Down

0 comments on commit 527c80a

Please sign in to comment.