Skip to content

Commit

Permalink
Docstring lints & improvements (#4155)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Jul 26, 2022
1 parent dcb0806 commit e2dacbf
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 35 deletions.
10 changes: 8 additions & 2 deletions py-polars/.flake8
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
[flake8]
# Satisfy black: https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html#flake8
max-line-length = 88
extend-ignore = E203
docstring-convention=all
extend-ignore =
# Satisfy black: https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html#flake8
E203,
# pydocstyle: http://www.pydocstyle.org/en/stable/error_codes.html
# numpy convention with D413 (Missing blank line after last section)
D107, D203, D212, D402, D415, D416

per-file-ignores =
__init__.py:F401
tests/*.py: E101, W191
Expand Down
29 changes: 15 additions & 14 deletions py-polars/polars/io.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""Functions for reading and writing data."""
from __future__ import annotations

from io import BytesIO, IOBase, StringIO
Expand Down Expand Up @@ -49,7 +50,7 @@ def _check_arg_is_1byte(
)


def update_columns(df: DataFrame, new_columns: list[str]) -> DataFrame:
def _update_columns(df: DataFrame, new_columns: list[str]) -> DataFrame:
if df.width > len(new_columns):
cols = df.columns
for i, name in enumerate(new_columns):
Expand Down Expand Up @@ -192,7 +193,6 @@ def read_csv(
scan_csv : Lazily read from a CSV file or multiple files via glob patterns.
"""

# Map legacy arguments to current ones and remove them from kwargs.
has_header = kwargs.pop("has_headers", has_header)
dtypes = kwargs.pop("dtype", dtypes)
Expand Down Expand Up @@ -273,7 +273,7 @@ def read_csv(

df = cast(DataFrame, from_arrow(tbl, rechunk))
if new_columns:
return update_columns(df, new_columns)
return _update_columns(df, new_columns)
return df

if projection and dtypes and isinstance(dtypes, list):
Expand Down Expand Up @@ -395,7 +395,7 @@ def read_csv(
)

if new_columns:
return update_columns(df, new_columns)
return _update_columns(df, new_columns)
return df


Expand Down Expand Up @@ -541,7 +541,6 @@ def scan_csv(
└─────────┴──────────┘
"""

# Map legacy arguments to current ones and remove them from kwargs.
has_header = kwargs.pop("has_headers", has_header)
dtypes = kwargs.pop("dtype", dtypes)
Expand Down Expand Up @@ -614,8 +613,8 @@ def scan_ipc(
Extra options that make sense for ``fsspec.open()`` or a
particular storage connection.
e.g. host, port, username, password, etc.
"""
"""
# Map legacy arguments to current ones and remove them from kwargs.
n_rows = kwargs.pop("stop_after_n_rows", n_rows)

Expand Down Expand Up @@ -673,8 +672,8 @@ def scan_parquet(
e.g. host, port, username, password, etc.
low_memory: bool
Reduce memory pressure at the expense of performance.
"""
"""
# Map legacy arguments to current ones and remove them from kwargs.
n_rows = kwargs.pop("stop_after_n_rows", n_rows)

Expand Down Expand Up @@ -716,6 +715,7 @@ def read_avro(
Returns
-------
DataFrame
"""
if isinstance(file, (str, Path)):
file = format_path(file)
Expand Down Expand Up @@ -770,8 +770,8 @@ def read_ipc(
Returns
-------
DataFrame
"""
"""
# Map legacy arguments to current ones and remove them from kwargs.
n_rows = kwargs.pop("stop_after_n_rows", n_rows)

Expand Down Expand Up @@ -862,8 +862,8 @@ def read_parquet(
Returns
-------
DataFrame
""" # noqa: E501
""" # noqa: E501
# Map legacy arguments to current ones and remove them from kwargs.
n_rows = kwargs.pop("stop_after_n_rows", n_rows)

Expand Down Expand Up @@ -913,6 +913,7 @@ def read_json(source: str | IOBase, json_lines: bool = False) -> DataFrame:
Path to a file or a file-like object.
json_lines
Toggle between "JSON" and "NDJSON" format
"""
return DataFrame._read_json(source, json_lines)

Expand Down Expand Up @@ -1017,9 +1018,10 @@ def read_excel(
read_csv_options: dict | None = None,
) -> DataFrame:
"""
Read Excel (XLSX) sheet into a DataFrame by converting an Excel sheet with
``xlsx2csv.Xlsx2csv().convert()`` to CSV and parsing the CSV output with
:func:`read_csv`.
Read Excel (XLSX) sheet into a DataFrame.
Converts an Excel sheet with ``xlsx2csv.Xlsx2csv().convert()`` to CSV and parses the
CSV output with :func:`read_csv`.
Parameters
----------
Expand Down Expand Up @@ -1088,8 +1090,8 @@ def read_excel(
>>> excel_file = "test.xlsx"
>>> pl.from_pandas(pd.read_excel(excel_file)) # doctest: +SKIP
"""
"""
try:
import xlsx2csv # type: ignore[import]
except ImportError:
Expand Down Expand Up @@ -1155,5 +1157,4 @@ def scan_ds(ds: pa.dataset.dataset) -> LazyFrame:
└───────┴────────┴────────────┘
"""

return _scan_ds(ds)
44 changes: 25 additions & 19 deletions py-polars/polars/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""Utility functions."""
from __future__ import annotations

import ctypes
Expand Down Expand Up @@ -43,6 +44,7 @@ def _process_null_values(
# https://stackoverflow.com/questions/4355524/getting-data-from-ctypes-array-into-numpy
def _ptr_to_numpy(ptr: int, len: int, ptr_type: Any) -> np.ndarray:
"""
Create a memory block view as a numpy array.
Parameters
----------
Expand Down Expand Up @@ -70,17 +72,17 @@ def _timedelta_to_pl_duration(td: timedelta) -> str:


def in_nanoseconds_window(dt: datetime) -> bool:
"""Check whether the given datetime can be represented as a Unix timestamp."""
return 1386 < dt.year < 2554


def timedelta_in_nanoseconds_window(td: timedelta) -> bool:
"""Check whether the given timedelta can be represented as a Unix timestamp."""
return in_nanoseconds_window(datetime(1970, 1, 1) + td)


def _datetime_to_pl_timestamp(dt: datetime, tu: str | None) -> int:
"""
Converts a python datetime to a timestamp in nanoseconds
"""
"""Convert a python datetime to a timestamp in nanoseconds."""
if tu == "ns":
return int(dt.replace(tzinfo=timezone.utc).timestamp() * 1e9)
elif tu == "us":
Expand Down Expand Up @@ -119,32 +121,35 @@ def is_str_sequence(
val: Sequence[object], allow_str: bool = False
) -> TypeGuard[Sequence[str]]:
"""
Checks that `val` is a sequence of strings. Note that a single string is a sequence
of strings by definition, use `allow_str=False` to return False on a single string
Check that `val` is a sequence of strings.
Note that a single string is a sequence of strings by definition, use
`allow_str=False` to return False on a single string.
"""
if (not allow_str) and isinstance(val, str):
return False
return _is_iterable_of(val, Sequence, str)


def is_int_sequence(val: Sequence[object]) -> TypeGuard[Sequence[int]]:
"""Check whether the given sequence is a sequence of integers."""
return _is_iterable_of(val, Sequence, int)


def _is_iterable_of(val: Iterable, itertype: type, eltype: type) -> bool:
"""Check whether the given iterable is of a certain type."""
return isinstance(val, itertype) and all(isinstance(x, eltype) for x in val)


def range_to_slice(rng: range) -> slice:
"""
Return the given range as an equivalent slice.
"""
"""Return the given range as an equivalent slice."""
return slice(rng.start, rng.stop, rng.step)


def handle_projection_columns(
columns: list[str] | list[int] | None,
) -> tuple[list[int] | None, list[str] | None]:
"""Disambiguates between columns specified as integers vs. strings."""
projection: list[int] | None = None
if columns:
if is_int_sequence(columns):
Expand Down Expand Up @@ -242,23 +247,20 @@ def _in_notebook() -> bool:


def format_path(path: str | Path) -> str:
"""
Returns a string path, expanding the home directory if present.
"""
"""Create a string path, expanding the home directory if present."""
return os.path.expanduser(path)


def threadpool_size() -> int:
"""
Get the size of polars; thread pool
"""
"""Get the size of polars; thread pool."""
return _pool_size()


def deprecated_alias(**aliases: str) -> Callable:
"""Decorator for deprecated function and method arguments.
"""
Deprecate a function or method argument.
Use as follows:
Decorator for deprecated function and method arguments. Use as follows:
@deprecated_alias(old_arg='new_arg')
def myfunc(new_arg):
Expand All @@ -268,18 +270,22 @@ def myfunc(new_arg):
def deco(f: Callable) -> Callable:
@functools.wraps(f)
def wrapper(*args: Any, **kwargs: Any) -> Callable:
rename_kwargs(f.__name__, kwargs, aliases)
_rename_kwargs(f.__name__, kwargs, aliases)
return f(*args, **kwargs)

return wrapper

return deco


def rename_kwargs(
def _rename_kwargs(
func_name: str, kwargs: dict[str, str], aliases: dict[str, str]
) -> None:
"""Helper function for deprecating function and method arguments."""
"""
Rename the keyword arguments of a function.
Helper function for deprecating function and method arguments.
"""
for alias, new in aliases.items():
if alias in kwargs:
if new in kwargs:
Expand Down

0 comments on commit e2dacbf

Please sign in to comment.