Make deprecation warnings into actual DepreationWarning + fix warning…

…s in test suite (#3158)
pola-rs · Apr 16, 2022 · 527c80a · 527c80a
1 parent c3d6f9a
commit 527c80a
Show file tree

Hide file tree

Showing 6 changed files with 114 additions and 85 deletions.
diff --git a/py-polars/polars/internals/construction.py b/py-polars/polars/internals/construction.py
@@ -560,8 +560,9 @@ def arrow_to_pydf(
 
     if len(dictionary_cols) > 0:
         df = pli.wrap_df(pydf)
-        for i, s in dictionary_cols.items():
-            df[s.name] = s
+        df = df.with_columns(
+            [pli.lit(s).alias(s.name) for s in dictionary_cols.values()]
+        )
         df = df[names]
         pydf = df._df
 

diff --git a/py-polars/polars/internals/frame.py b/py-polars/polars/internals/frame.py
@@ -949,7 +949,9 @@ def to_json(
         .. deprecated:: 0.13.12
             Please use `write_json`
         """
-        warnings.warn("'to_json' is deprecated. please use 'write_json'")
+        warnings.warn(
+            "'to_json' is deprecated. please use 'write_json'", DeprecationWarning
+        )
         return self.write_json(
             file, pretty, row_oriented, json_lines, to_string=to_string
         )
@@ -1127,7 +1129,9 @@ def to_csv(
         .. deprecated:: 0.13.12
             Please use `write_csv`
         """
-        warnings.warn("'to_csv' is deprecated. please use 'write_csv'")
+        warnings.warn(
+            "'to_csv' is deprecated. please use 'write_csv'", DeprecationWarning
+        )
         return self.write_csv(file, has_header, sep)
 
     def write_avro(
@@ -1162,7 +1166,9 @@ def to_avro(
         .. deprecated:: 0.13.12
             Please use `write_avro`
         """
-        warnings.warn("'to_avro' is deprecated. please use 'write_avro'")
+        warnings.warn(
+            "'to_avro' is deprecated. please use 'write_avro'", DeprecationWarning
+        )
         return self.write_avro(file, compression)
 
     def write_ipc(
@@ -1199,7 +1205,9 @@ def to_ipc(
         .. deprecated:: 0.13.12
             Please use `write_ipc`
         """
-        warnings.warn("'to_ipc' is deprecated. please use 'write_ipc'")
+        warnings.warn(
+            "'to_ipc' is deprecated. please use 'write_ipc'", DeprecationWarning
+        )
         return self.write_ipc(file, compression)
 
     def to_dicts(self) -> List[Dict[str, Any]]:
@@ -1418,7 +1426,9 @@ def to_parquet(
         .. deprecated:: 0.13.12
             Please use `write_parquet`
         """
-        warnings.warn("'to_parquet' is deprecated. please use 'write_parquet'")
+        warnings.warn(
+            "'to_parquet' is deprecated. please use 'write_parquet'", DeprecationWarning
+        )
         return self.write_parquet(file, compression, statistics, use_pyarrow, **kwargs)
 
     def to_numpy(self) -> np.ndarray:
@@ -1517,7 +1527,10 @@ def __getattr__(self, item: Any) -> "PySeries":
         if item.startswith("_"):
             raise AttributeError(item)
         try:
-            warnings.warn("accessing series as Attribute of a DataFrame is deprecated")
+            warnings.warn(
+                "accessing series as Attribute of a DataFrame is deprecated",
+                DeprecationWarning,
+            )
             return pli.wrap_s(self._df.column(item))
         except Exception:
             raise AttributeError(item)
@@ -1734,7 +1747,8 @@ def __setitem__(
         self, key: Union[str, List, Tuple[Any, Union[str, int]]], value: Any
     ) -> None:
         warnings.warn(
-            "setting a DataFrame by indexing is deprecated; Consider using DataFrame.with_column"
+            "setting a DataFrame by indexing is deprecated; Consider using DataFrame.with_column",
+            DeprecationWarning,
         )
         # df["foo"] = series
         if isinstance(key, str):
@@ -2279,13 +2293,17 @@ def sort(
             )
             if in_place:
                 warnings.warn(
-                    "in-place sorting is deprecated; please use default sorting"
+                    "in-place sorting is deprecated; please use default sorting",
+                    DeprecationWarning,
                 )
                 self._df = df._df
                 return self
             return df
         if in_place:
-            warnings.warn("in-place sorting is deprecated; please use default sorting")
+            warnings.warn(
+                "in-place sorting is deprecated; please use default sorting",
+                DeprecationWarning,
+            )
             self._df.sort_in_place(by, reverse)
             return None
         else:
@@ -3437,7 +3455,8 @@ def join(
         """
         if how == "asof":
             warnings.warn(
-                "using asof join via DataFrame.join is deprecated, please use DataFrame.join_asof"
+                "using asof join via DataFrame.join is deprecated, please use DataFrame.join_asof",
+                DeprecationWarning,
             )
         if how == "cross":
             return self._from_pydf(self._df.join(df._df, [], [], how, suffix))
@@ -5385,7 +5404,8 @@ def _select(self, columns: Union[str, List[str]]) -> "GBSelection[DF]":
             One or multiple columns.
         """
         warnings.warn(
-            "accessing GroupBy by index is deprecated, consider using the `.agg` method"
+            "accessing GroupBy by index is deprecated, consider using the `.agg` method",
+            DeprecationWarning,
         )
         if isinstance(columns, str):
             columns = [columns]
@@ -5472,7 +5492,8 @@ def groups(self) -> DF:
         * the group indexes aggregated as lists
         """
         warnings.warn(
-            "accessing GroupBy by index is deprecated, consider using the `.agg` method"
+            "accessing GroupBy by index is deprecated, consider using the `.agg` method",
+            DeprecationWarning,
         )
         return self._dataframe_class._from_pydf(
             self._df.groupby(self.by, None, "groups")

diff --git a/py-polars/polars/internals/lazy_frame.py b/py-polars/polars/internals/lazy_frame.py
@@ -1145,7 +1145,8 @@ def join(
         """
         if how == "asof":
             warnings.warn(
-                "using asof join via LazyFrame.join is deprecated, please use LazyFrame.join_asof"
+                "using asof join via LazyFrame.join is deprecated, please use LazyFrame.join_asof",
+                DeprecationWarning,
             )
         if how == "cross":
             return self._from_pyldf(

diff --git a/py-polars/tests/test_df.py b/py-polars/tests/test_df.py
@@ -398,8 +398,8 @@ def test_selection() -> None:
     assert df[[0, 1], "b"].shape == (2, 1)
     assert df[[2], ["a", "b"]].shape == (1, 2)
     assert df.select_at_idx(0).name == "a"
-    assert (df.a == df["a"]).sum() == 3
-    assert (df.c == df["a"]).sum() == 0
+    assert (df["a"] == df["a"]).sum() == 3
+    assert (df["c"] == df["a"]).sum() == 0
     assert df[:, "a":"b"].shape == (3, 2)  # type: ignore
     assert df[:, "a":"c"].columns == ["a", "b", "c"]  # type: ignore
     expect = pl.DataFrame({"c": ["b"]})
@@ -436,12 +436,14 @@ def test_from_arrow() -> None:
 
 def test_sort() -> None:
     df = pl.DataFrame({"a": [2, 1, 3], "b": [1, 2, 3]})
-    df.sort("a", in_place=True)
+    with pytest.deprecated_call():
+        df.sort("a", in_place=True)
     assert df.frame_equal(pl.DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]}))
 
     # test in-place + passing a list
     df = pl.DataFrame({"a": [2, 1, 3], "b": [1, 2, 3]})
-    df.sort(["a", "b"], in_place=True)
+    with pytest.deprecated_call():
+        df.sort(["a", "b"], in_place=True)
     assert df.frame_equal(pl.DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]}))
 
 
@@ -454,10 +456,12 @@ def test_replace() -> None:
 
 def test_assignment() -> None:
     df = pl.DataFrame({"foo": [1, 2, 3], "bar": [2, 3, 4]})
-    df["foo"] = df["foo"]
+    df = df.with_column(pl.col("foo").alias("foo"))
     # make sure that assignment does not change column order
     assert df.columns == ["foo", "bar"]
-    df[df["foo"] > 1, "foo"] = 9
+    df = df.with_column(
+        pl.when(pl.col("foo") > 1).then(9).otherwise(pl.col("foo")).alias("foo")
+    )
     assert df["foo"].to_list() == [1, 9, 9]
 
 
@@ -550,17 +554,15 @@ def test_groupby() -> None:
     # )
     assert df.groupby("a").apply(lambda df: df[["c"]].sum()).sort("c")["c"][0] == 1
 
-    assert (
-        df.groupby("a")
-        .groups()
-        .sort("a")["a"]
-        .series_equal(pl.Series("a", ["a", "b", "c"]))
-    )
+    df_groups = df.groupby("a").groups().sort("a")
+    assert df_groups["a"].series_equal(pl.Series("a", ["a", "b", "c"]))
 
-    for subdf in df.groupby("a"):  # type: ignore
-        # TODO: add __next__() to GroupBy
-        if subdf["a"][0] == "b":
-            assert subdf.shape == (3, 3)
+    with pytest.deprecated_call():
+        # TODO: find a way to avoid indexing into GroupBy
+        for subdf in df.groupby("a"):  # type: ignore
+            # TODO: add __next__() to GroupBy
+            if subdf["a"][0] == "b":
+                assert subdf.shape == (3, 3)
 
     assert df.groupby("a").get_group("c").shape == (1, 3)
     assert df.groupby("a").get_group("b").shape == (3, 3)
@@ -583,9 +585,9 @@ def test_groupby() -> None:
     df.groupby("b").agg(pl.col("c").forward_fill()).explode("c")
 
     # get a specific column
-    result = df.groupby("b")["a"].count()
+    result = df.groupby("b").agg(pl.count("a"))
     assert result.shape == (2, 2)
-    assert result.columns == ["b", "a_count"]
+    assert result.columns == ["b", "a"]
 
     # make sure all the methods below run
     assert df.groupby("b").first().shape == (2, 3)
@@ -838,47 +840,49 @@ def test_file_buffer() -> None:
 
 
 def test_set() -> None:
-    np.random.seed(1)
-    df = pl.DataFrame(
-        {"foo": np.random.rand(10), "bar": np.arange(10), "ham": ["h"] * 10}
-    )
-    df["new"] = np.random.rand(10)
-    df[df["new"] > 0.5, "new"] = 1
+    """Setting a dataframe using indices is deprecated. We keep these tests because we only generate a warning"""
+    with pytest.deprecated_call():
+        np.random.seed(1)
+        df = pl.DataFrame(
+            {"foo": np.random.rand(10), "bar": np.arange(10), "ham": ["h"] * 10}
+        )
+        df["new"] = np.random.rand(10)
+        df[df["new"] > 0.5, "new"] = 1
 
-    # set 2D
-    df = pl.DataFrame({"b": [0, 0]})
-    df[["A", "B"]] = [[1, 2], [1, 2]]
-    assert df["A"] == [1, 1]
-    assert df["B"] == [2, 2]
+        # set 2D
+        df = pl.DataFrame({"b": [0, 0]})
+        df[["A", "B"]] = [[1, 2], [1, 2]]
+        assert df["A"] == [1, 1]
+        assert df["B"] == [2, 2]
 
-    with pytest.raises(ValueError):
-        df[["C", "D"]] = 1
-    with pytest.raises(ValueError):
-        df[["C", "D"]] = [1, 1]
-    with pytest.raises(ValueError):
-        df[["C", "D"]] = [[1, 2, 3], [1, 2, 3]]
+        with pytest.raises(ValueError):
+            df[["C", "D"]] = 1
+        with pytest.raises(ValueError):
+            df[["C", "D"]] = [1, 1]
+        with pytest.raises(ValueError):
+            df[["C", "D"]] = [[1, 2, 3], [1, 2, 3]]
 
-    # set tuple
-    df = pl.DataFrame({"b": [0, 0]})
-    df[0, "b"] = 1
-    assert df[0, "b"] == 1
+        # set tuple
+        df = pl.DataFrame({"b": [0, 0]})
+        df[0, "b"] = 1
+        assert df[0, "b"] == 1
 
-    df[0, 0] = 2
-    assert df[0, "b"] == 2
+        df[0, 0] = 2
+        assert df[0, "b"] == 2
 
-    # row and col selection have to be int or str
-    with pytest.raises(ValueError):
-        df[:, [1]] = 1  # type: ignore
-    with pytest.raises(ValueError):
-        df[True, :] = 1  # type: ignore
+        # row and col selection have to be int or str
+        with pytest.raises(ValueError):
+            df[:, [1]] = 1  # type: ignore
+        with pytest.raises(ValueError):
+            df[True, :] = 1  # type: ignore
 
-    # needs to be a 2 element tuple
-    with pytest.raises(ValueError):
-        df[(1, 2, 3)] = 1  # type: ignore
+        # needs to be a 2 element tuple
+        with pytest.raises(ValueError):
+            df[(1, 2, 3)] = 1  # type: ignore
 
-    # we cannot index with any type, such as bool
-    with pytest.raises(NotImplementedError):
-        df[True] = 1  # type: ignore
+        # we cannot index with any type, such as bool
+        with pytest.raises(NotImplementedError):
+            df[True] = 1  # type: ignore
 
 
 def test_melt() -> None:
@@ -1184,7 +1188,7 @@ def test_assign() -> None:
     # check if can assign in case of a single column
     df = pl.DataFrame({"a": [1, 2, 3]})
     # test if we can assign in case of single column
-    df["a"] = df["a"] * 2
+    df = df.with_column(pl.col("a") * 2)
     assert df["a"] == [2, 4, 6]
 
 
@@ -1234,7 +1238,7 @@ def test_rename(df: pl.DataFrame) -> None:
     _ = out[["foos", "bars"]]
 
 
-def test_to_csv() -> None:
+def test_write_csv() -> None:
     df = pl.DataFrame(
         {
             "foo": [1, 2, 3, 4, 5],
@@ -1244,13 +1248,13 @@ def test_to_csv() -> None:
     )
     expected = "foo,bar,ham\n1,6,a\n2,7,b\n3,8,c\n4,9,d\n5,10,e\n"
 
-    # if no file argument is supplied, to_csv() will return the string
-    s = df.to_csv()
+    # if no file argument is supplied, write_csv() will return the string
+    s = df.write_csv()
     assert s == expected
 
     # otherwise it will write to the file/iobuffer
     file = BytesIO()
-    df.to_csv(file)
+    df.write_csv(file)
     file.seek(0)
     s = file.read().decode("utf8")
     assert s == expected
@@ -1802,11 +1806,12 @@ def test_add_string() -> None:
 
 
 def test_getattr() -> None:
-    df = pl.DataFrame({"a": [1.0, 2.0]})
-    testing.assert_series_equal(df.a, pl.Series("a", [1.0, 2.0]))
+    with pytest.deprecated_call():
+        df = pl.DataFrame({"a": [1.0, 2.0]})
+        testing.assert_series_equal(df.a, pl.Series("a", [1.0, 2.0]))
 
-    with pytest.raises(AttributeError):
-        _ = df.b
+        with pytest.raises(AttributeError):
+            _ = df.b
 
 
 def test_get_item() -> None:
@@ -1990,7 +1995,7 @@ class MyDataFrame(pl.DataFrame):
 
 
 def test_preservation_of_subclasses_after_groupby_statements() -> None:
-    """Group by operations should preserve inherited datframe classes."""
+    """Group by operations should preserve inherited dataframe classes."""
 
     class SubClassedDataFrame(pl.DataFrame):
         pass
@@ -2001,7 +2006,9 @@ class SubClassedDataFrame(pl.DataFrame):
     assert isinstance(groupby.agg(pl.count()), SubClassedDataFrame)
 
     # Round-trips to GBSelection and back should also preserve subclass
-    assert isinstance(groupby["a"].count(), SubClassedDataFrame)
+    assert isinstance(
+        groupby.agg(pl.col("a").count().alias("count")), SubClassedDataFrame
+    )
 
     # Round-trips to PivotOps and back should also preserve subclass
     assert isinstance(