Skip to content

Commit

Permalink
fix[python]: raise DeprecationWarning on deprecated arguments (#4470)
Browse files Browse the repository at this point in the history
  • Loading branch information
matteosantama committed Aug 18, 2022
1 parent 200c6ed commit 5ee5da3
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 50 deletions.
70 changes: 22 additions & 48 deletions py-polars/polars/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

from io import BytesIO, IOBase, StringIO
from pathlib import Path
from typing import TYPE_CHECKING, Any, BinaryIO, Callable, Mapping, TextIO
from typing import TYPE_CHECKING, BinaryIO, Callable, Mapping, TextIO

from polars.utils import format_path, handle_projection_columns
from polars.utils import deprecated_alias, format_path, handle_projection_columns

try:
import pyarrow as pa
Expand Down Expand Up @@ -63,6 +63,12 @@ def _update_columns(df: DataFrame, new_columns: list[str]) -> DataFrame:
return df


@deprecated_alias(
has_headers="has_header",
dtype="dtypes",
stop_after_n_rows="n_rows",
projection="columns",
)
def read_csv(
file: str | TextIO | BytesIO | Path | BinaryIO | bytes,
has_header: bool = True,
Expand Down Expand Up @@ -90,7 +96,6 @@ def read_csv(
row_count_offset: int = 0,
sample_size: int = 1024,
eol_char: str = "\n",
**kwargs: Any,
) -> DataFrame:
"""
Read a CSV file into a DataFrame.
Expand Down Expand Up @@ -200,14 +205,6 @@ def read_csv(
scan_csv : Lazily read from a CSV file or multiple files via glob patterns.
"""
# Map legacy arguments to current ones and remove them from kwargs.
has_header = kwargs.pop("has_headers", has_header)
dtypes = kwargs.pop("dtype", dtypes)
n_rows = kwargs.pop("stop_after_n_rows", n_rows)

if columns is None:
columns = kwargs.pop("projection", None)

_check_arg_is_1byte("sep", sep, False)
_check_arg_is_1byte("comment_char", comment_char, False)
_check_arg_is_1byte("quote_char", quote_char, True)
Expand Down Expand Up @@ -415,6 +412,7 @@ def read_csv(
return df


@deprecated_alias(has_headers="has_header", dtype="dtypes", stop_after_n_rows="n_rows")
def scan_csv(
file: str | Path,
has_header: bool = True,
Expand All @@ -437,7 +435,6 @@ def scan_csv(
row_count_offset: int = 0,
parse_dates: bool = False,
eol_char: str = "\n",
**kwargs: Any,
) -> LazyFrame:
"""
Lazily read from a CSV file or multiple files via glob patterns.
Expand Down Expand Up @@ -560,11 +557,6 @@ def scan_csv(
└─────────┴──────────┘
"""
# Map legacy arguments to current ones and remove them from kwargs.
has_header = kwargs.pop("has_headers", has_header)
dtypes = kwargs.pop("dtype", dtypes)
n_rows = kwargs.pop("stop_after_n_rows", n_rows)

_check_arg_is_1byte("sep", sep, False)
_check_arg_is_1byte("comment_char", comment_char, False)
_check_arg_is_1byte("quote_char", quote_char, True)
Expand Down Expand Up @@ -597,6 +589,7 @@ def scan_csv(
)


@deprecated_alias(stop_after_n_rows="n_rows")
def scan_ipc(
file: str | Path,
n_rows: int | None = None,
Expand All @@ -606,7 +599,6 @@ def scan_ipc(
row_count_offset: int = 0,
storage_options: dict[str, object] | None = None,
memory_map: bool = True,
**kwargs: Any,
) -> LazyFrame:
"""
Lazily read from an Arrow IPC (Feather v2) file or multiple files via glob patterns.
Expand Down Expand Up @@ -639,9 +631,6 @@ def scan_ipc(
Only uncompressed IPC files can be memory mapped.
"""
# Map legacy arguments to current ones and remove them from kwargs.
n_rows = kwargs.pop("stop_after_n_rows", n_rows)

return LazyFrame.scan_ipc(
file=file,
n_rows=n_rows,
Expand All @@ -654,6 +643,7 @@ def scan_ipc(
)


@deprecated_alias(stop_after_n_rows="n_rows")
def scan_parquet(
file: str | Path,
n_rows: int | None = None,
Expand All @@ -664,7 +654,6 @@ def scan_parquet(
row_count_offset: int = 0,
storage_options: dict[str, object] | None = None,
low_memory: bool = False,
**kwargs: Any,
) -> LazyFrame:
"""
Lazily read from a parquet file or multiple files via glob patterns.
Expand Down Expand Up @@ -699,9 +688,6 @@ def scan_parquet(
Reduce memory pressure at the expense of performance.
"""
# Map legacy arguments to current ones and remove them from kwargs.
n_rows = kwargs.pop("stop_after_n_rows", n_rows)

if isinstance(file, (str, Path)):
file = format_path(file)

Expand All @@ -718,11 +704,11 @@ def scan_parquet(
)


@deprecated_alias(projection="columns")
def read_avro(
file: str | Path | BytesIO | BinaryIO,
columns: list[int] | list[str] | None = None,
n_rows: int | None = None,
**kwargs: Any,
) -> DataFrame:
"""
Read into a DataFrame from Apache Avro format.
Expand All @@ -744,12 +730,11 @@ def read_avro(
"""
if isinstance(file, (str, Path)):
file = format_path(file)
if columns is None:
columns = kwargs.pop("projection", None)

return DataFrame._read_avro(file, n_rows=n_rows, columns=columns)


@deprecated_alias(stop_after_n_rows="n_rows", projection="columns")
def read_ipc(
file: str | BinaryIO | BytesIO | Path | bytes,
columns: list[int] | list[str] | None = None,
Expand All @@ -760,7 +745,6 @@ def read_ipc(
row_count_name: str | None = None,
row_count_offset: int = 0,
rechunk: bool = True,
**kwargs: Any,
) -> DataFrame:
"""
Read into a DataFrame from Arrow IPC (Feather v2) file.
Expand Down Expand Up @@ -798,12 +782,6 @@ def read_ipc(
DataFrame
"""
# Map legacy arguments to current ones and remove them from kwargs.
n_rows = kwargs.pop("stop_after_n_rows", n_rows)

if columns is None:
columns = kwargs.pop("projection", None)

if use_pyarrow:
if n_rows and not memory_map:
raise ValueError(
Expand Down Expand Up @@ -839,6 +817,7 @@ def read_ipc(
)


@deprecated_alias(stop_after_n_rows="n_rows", projection="columns")
def read_parquet(
source: str | Path | BinaryIO | BytesIO | bytes,
columns: list[int] | list[str] | None = None,
Expand All @@ -850,7 +829,7 @@ def read_parquet(
row_count_name: str | None = None,
row_count_offset: int = 0,
low_memory: bool = False,
**kwargs: Any,
pyarrow_options: dict[str, object] | None = None,
) -> DataFrame:
"""
Read into a DataFrame from a parquet file.
Expand Down Expand Up @@ -886,26 +865,21 @@ def read_parquet(
Offset to start the row_count column (only use if the name is set).
low_memory
Reduce memory pressure at the expense of performance.
**kwargs
kwargs for `pyarrow.parquet.read_table
pyarrow_options
Keyword arguments for `pyarrow.parquet.read_table
<https://arrow.apache.org/docs/python/generated/pyarrow.parquet.read_table.html>`_.
Returns
-------
DataFrame
""" # noqa: E501
# Map legacy arguments to current ones and remove them from kwargs.
n_rows = kwargs.pop("stop_after_n_rows", n_rows)

if columns is None:
columns = kwargs.pop("projection", None)

if use_pyarrow:
if n_rows:
raise ValueError("``n_rows`` cannot be used with ``use_pyarrow=True``.")
if use_pyarrow and n_rows:
raise ValueError("``n_rows`` cannot be used with ``use_pyarrow=True``.")

storage_options = storage_options or {}
pyarrow_options = pyarrow_options or {}

with _prepare_file_arg(
source, use_pyarrow=use_pyarrow, **storage_options
) as source_prep:
Expand All @@ -921,7 +895,7 @@ def read_parquet(
source_prep,
memory_map=memory_map,
columns=columns,
**kwargs,
**pyarrow_options,
)
)

Expand Down
4 changes: 2 additions & 2 deletions py-polars/tests/io/test_lazy_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ def test_invalid_utf8() -> None:
with open(file, "wb") as f:
f.write(bts)

a = pl.read_csv(file, has_headers=False, encoding="utf8-lossy")
b = pl.scan_csv(file, has_headers=False, encoding="utf8-lossy").collect()
a = pl.read_csv(file, has_header=False, encoding="utf8-lossy")
b = pl.scan_csv(file, has_header=False, encoding="utf8-lossy").collect()
assert a.frame_equal(b, null_equal=True)


Expand Down

0 comments on commit 5ee5da3

Please sign in to comment.