fix[python]: raise DeprecationWarning on deprecated arguments (#4470)

pola-rs · Aug 18, 2022 · 5ee5da3 · 5ee5da3
1 parent 200c6ed
commit 5ee5da3
Show file tree

Hide file tree

Showing 2 changed files with 24 additions and 50 deletions.
diff --git a/py-polars/polars/io.py b/py-polars/polars/io.py
@@ -3,9 +3,9 @@
 
 from io import BytesIO, IOBase, StringIO
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, BinaryIO, Callable, Mapping, TextIO
+from typing import TYPE_CHECKING, BinaryIO, Callable, Mapping, TextIO
 
-from polars.utils import format_path, handle_projection_columns
+from polars.utils import deprecated_alias, format_path, handle_projection_columns
 
 try:
     import pyarrow as pa
@@ -63,6 +63,12 @@ def _update_columns(df: DataFrame, new_columns: list[str]) -> DataFrame:
     return df
 
 
+@deprecated_alias(
+    has_headers="has_header",
+    dtype="dtypes",
+    stop_after_n_rows="n_rows",
+    projection="columns",
+)
 def read_csv(
     file: str | TextIO | BytesIO | Path | BinaryIO | bytes,
     has_header: bool = True,
@@ -90,7 +96,6 @@ def read_csv(
     row_count_offset: int = 0,
     sample_size: int = 1024,
     eol_char: str = "\n",
-    **kwargs: Any,
 ) -> DataFrame:
     """
     Read a CSV file into a DataFrame.
@@ -200,14 +205,6 @@ def read_csv(
     scan_csv : Lazily read from a CSV file or multiple files via glob patterns.
 
     """
-    # Map legacy arguments to current ones and remove them from kwargs.
-    has_header = kwargs.pop("has_headers", has_header)
-    dtypes = kwargs.pop("dtype", dtypes)
-    n_rows = kwargs.pop("stop_after_n_rows", n_rows)
-
-    if columns is None:
-        columns = kwargs.pop("projection", None)
-
     _check_arg_is_1byte("sep", sep, False)
     _check_arg_is_1byte("comment_char", comment_char, False)
     _check_arg_is_1byte("quote_char", quote_char, True)
@@ -415,6 +412,7 @@ def read_csv(
     return df
 
 
+@deprecated_alias(has_headers="has_header", dtype="dtypes", stop_after_n_rows="n_rows")
 def scan_csv(
     file: str | Path,
     has_header: bool = True,
@@ -437,7 +435,6 @@ def scan_csv(
     row_count_offset: int = 0,
     parse_dates: bool = False,
     eol_char: str = "\n",
-    **kwargs: Any,
 ) -> LazyFrame:
     """
     Lazily read from a CSV file or multiple files via glob patterns.
@@ -560,11 +557,6 @@ def scan_csv(
     └─────────┴──────────┘
 
     """
-    # Map legacy arguments to current ones and remove them from kwargs.
-    has_header = kwargs.pop("has_headers", has_header)
-    dtypes = kwargs.pop("dtype", dtypes)
-    n_rows = kwargs.pop("stop_after_n_rows", n_rows)
-
     _check_arg_is_1byte("sep", sep, False)
     _check_arg_is_1byte("comment_char", comment_char, False)
     _check_arg_is_1byte("quote_char", quote_char, True)
@@ -597,6 +589,7 @@ def scan_csv(
     )
 
 
+@deprecated_alias(stop_after_n_rows="n_rows")
 def scan_ipc(
     file: str | Path,
     n_rows: int | None = None,
@@ -606,7 +599,6 @@ def scan_ipc(
     row_count_offset: int = 0,
     storage_options: dict[str, object] | None = None,
     memory_map: bool = True,
-    **kwargs: Any,
 ) -> LazyFrame:
     """
     Lazily read from an Arrow IPC (Feather v2) file or multiple files via glob patterns.
@@ -639,9 +631,6 @@ def scan_ipc(
         Only uncompressed IPC files can be memory mapped.
 
     """
-    # Map legacy arguments to current ones and remove them from kwargs.
-    n_rows = kwargs.pop("stop_after_n_rows", n_rows)
-
     return LazyFrame.scan_ipc(
         file=file,
         n_rows=n_rows,
@@ -654,6 +643,7 @@ def scan_ipc(
     )
 
 
+@deprecated_alias(stop_after_n_rows="n_rows")
 def scan_parquet(
     file: str | Path,
     n_rows: int | None = None,
@@ -664,7 +654,6 @@ def scan_parquet(
     row_count_offset: int = 0,
     storage_options: dict[str, object] | None = None,
     low_memory: bool = False,
-    **kwargs: Any,
 ) -> LazyFrame:
     """
     Lazily read from a parquet file or multiple files via glob patterns.
@@ -699,9 +688,6 @@ def scan_parquet(
         Reduce memory pressure at the expense of performance.
 
     """
-    # Map legacy arguments to current ones and remove them from kwargs.
-    n_rows = kwargs.pop("stop_after_n_rows", n_rows)
-
     if isinstance(file, (str, Path)):
         file = format_path(file)
 
@@ -718,11 +704,11 @@ def scan_parquet(
     )
 
 
+@deprecated_alias(projection="columns")
 def read_avro(
     file: str | Path | BytesIO | BinaryIO,
     columns: list[int] | list[str] | None = None,
     n_rows: int | None = None,
-    **kwargs: Any,
 ) -> DataFrame:
     """
     Read into a DataFrame from Apache Avro format.
@@ -744,12 +730,11 @@ def read_avro(
     """
     if isinstance(file, (str, Path)):
         file = format_path(file)
-    if columns is None:
-        columns = kwargs.pop("projection", None)
 
     return DataFrame._read_avro(file, n_rows=n_rows, columns=columns)
 
 
+@deprecated_alias(stop_after_n_rows="n_rows", projection="columns")
 def read_ipc(
     file: str | BinaryIO | BytesIO | Path | bytes,
     columns: list[int] | list[str] | None = None,
@@ -760,7 +745,6 @@ def read_ipc(
     row_count_name: str | None = None,
     row_count_offset: int = 0,
     rechunk: bool = True,
-    **kwargs: Any,
 ) -> DataFrame:
     """
     Read into a DataFrame from Arrow IPC (Feather v2) file.
@@ -798,12 +782,6 @@ def read_ipc(
     DataFrame
 
     """
-    # Map legacy arguments to current ones and remove them from kwargs.
-    n_rows = kwargs.pop("stop_after_n_rows", n_rows)
-
-    if columns is None:
-        columns = kwargs.pop("projection", None)
-
     if use_pyarrow:
         if n_rows and not memory_map:
             raise ValueError(
@@ -839,6 +817,7 @@ def read_ipc(
         )
 
 
+@deprecated_alias(stop_after_n_rows="n_rows", projection="columns")
 def read_parquet(
     source: str | Path | BinaryIO | BytesIO | bytes,
     columns: list[int] | list[str] | None = None,
@@ -850,7 +829,7 @@ def read_parquet(
     row_count_name: str | None = None,
     row_count_offset: int = 0,
     low_memory: bool = False,
-    **kwargs: Any,
+    pyarrow_options: dict[str, object] | None = None,
 ) -> DataFrame:
     """
     Read into a DataFrame from a parquet file.
@@ -886,26 +865,21 @@ def read_parquet(
         Offset to start the row_count column (only use if the name is set).
     low_memory
         Reduce memory pressure at the expense of performance.
-    **kwargs
-        kwargs for `pyarrow.parquet.read_table
+    pyarrow_options
+        Keyword arguments for `pyarrow.parquet.read_table
         <https://arrow.apache.org/docs/python/generated/pyarrow.parquet.read_table.html>`_.
 
     Returns
     -------
     DataFrame
 
     """  # noqa: E501
-    # Map legacy arguments to current ones and remove them from kwargs.
-    n_rows = kwargs.pop("stop_after_n_rows", n_rows)
-
-    if columns is None:
-        columns = kwargs.pop("projection", None)
-
-    if use_pyarrow:
-        if n_rows:
-            raise ValueError("``n_rows`` cannot be used with ``use_pyarrow=True``.")
+    if use_pyarrow and n_rows:
+        raise ValueError("``n_rows`` cannot be used with ``use_pyarrow=True``.")
 
     storage_options = storage_options or {}
+    pyarrow_options = pyarrow_options or {}
+
     with _prepare_file_arg(
         source, use_pyarrow=use_pyarrow, **storage_options
     ) as source_prep:
@@ -921,7 +895,7 @@ def read_parquet(
                     source_prep,
                     memory_map=memory_map,
                     columns=columns,
-                    **kwargs,
+                    **pyarrow_options,
                 )
             )
 

diff --git a/py-polars/tests/io/test_lazy_csv.py b/py-polars/tests/io/test_lazy_csv.py
@@ -28,8 +28,8 @@ def test_invalid_utf8() -> None:
     with open(file, "wb") as f:
         f.write(bts)
 
-    a = pl.read_csv(file, has_headers=False, encoding="utf8-lossy")
-    b = pl.scan_csv(file, has_headers=False, encoding="utf8-lossy").collect()
+    a = pl.read_csv(file, has_header=False, encoding="utf8-lossy")
+    b = pl.scan_csv(file, has_header=False, encoding="utf8-lossy").collect()
     assert a.frame_equal(b, null_equal=True)