Docstring lints & improvements (#4155)

pola-rs · Jul 26, 2022 · e2dacbf · e2dacbf
1 parent dcb0806
commit e2dacbf
Show file tree

Hide file tree

Showing 3 changed files with 48 additions and 35 deletions.
diff --git a/py-polars/.flake8 b/py-polars/.flake8
@@ -1,7 +1,13 @@
 [flake8]
-# Satisfy black: https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html#flake8
 max-line-length = 88
-extend-ignore = E203
+docstring-convention=all
+extend-ignore =
+    # Satisfy black: https://black.readthedocs.io/en/stable/guides/using_black_with_other_tools.html#flake8
+    E203,
+    # pydocstyle: http://www.pydocstyle.org/en/stable/error_codes.html
+    # numpy convention with D413 (Missing blank line after last section)
+    D107, D203, D212, D402, D415, D416
+
 per-file-ignores =
     __init__.py:F401
     tests/*.py: E101, W191

diff --git a/py-polars/polars/io.py b/py-polars/polars/io.py
@@ -1,3 +1,4 @@
+"""Functions for reading and writing data."""
 from __future__ import annotations
 
 from io import BytesIO, IOBase, StringIO
@@ -49,7 +50,7 @@ def _check_arg_is_1byte(
             )
 
 
-def update_columns(df: DataFrame, new_columns: list[str]) -> DataFrame:
+def _update_columns(df: DataFrame, new_columns: list[str]) -> DataFrame:
     if df.width > len(new_columns):
         cols = df.columns
         for i, name in enumerate(new_columns):
@@ -192,7 +193,6 @@ def read_csv(
     scan_csv : Lazily read from a CSV file or multiple files via glob patterns.
 
     """
-
     # Map legacy arguments to current ones and remove them from kwargs.
     has_header = kwargs.pop("has_headers", has_header)
     dtypes = kwargs.pop("dtype", dtypes)
@@ -273,7 +273,7 @@ def read_csv(
 
         df = cast(DataFrame, from_arrow(tbl, rechunk))
         if new_columns:
-            return update_columns(df, new_columns)
+            return _update_columns(df, new_columns)
         return df
 
     if projection and dtypes and isinstance(dtypes, list):
@@ -395,7 +395,7 @@ def read_csv(
         )
 
     if new_columns:
-        return update_columns(df, new_columns)
+        return _update_columns(df, new_columns)
     return df
 
 
@@ -541,7 +541,6 @@ def scan_csv(
     └─────────┴──────────┘
 
     """
-
     # Map legacy arguments to current ones and remove them from kwargs.
     has_header = kwargs.pop("has_headers", has_header)
     dtypes = kwargs.pop("dtype", dtypes)
@@ -614,8 +613,8 @@ def scan_ipc(
         Extra options that make sense for ``fsspec.open()`` or a
         particular storage connection.
         e.g. host, port, username, password, etc.
-    """
 
+    """
     # Map legacy arguments to current ones and remove them from kwargs.
     n_rows = kwargs.pop("stop_after_n_rows", n_rows)
 
@@ -673,8 +672,8 @@ def scan_parquet(
         e.g. host, port, username, password, etc.
     low_memory: bool
         Reduce memory pressure at the expense of performance.
-    """
 
+    """
     # Map legacy arguments to current ones and remove them from kwargs.
     n_rows = kwargs.pop("stop_after_n_rows", n_rows)
 
@@ -716,6 +715,7 @@ def read_avro(
     Returns
     -------
     DataFrame
+
     """
     if isinstance(file, (str, Path)):
         file = format_path(file)
@@ -770,8 +770,8 @@ def read_ipc(
     Returns
     -------
     DataFrame
-    """
 
+    """
     # Map legacy arguments to current ones and remove them from kwargs.
     n_rows = kwargs.pop("stop_after_n_rows", n_rows)
 
@@ -862,8 +862,8 @@ def read_parquet(
     Returns
     -------
     DataFrame
-    """  # noqa: E501
 
+    """  # noqa: E501
     # Map legacy arguments to current ones and remove them from kwargs.
     n_rows = kwargs.pop("stop_after_n_rows", n_rows)
 
@@ -913,6 +913,7 @@ def read_json(source: str | IOBase, json_lines: bool = False) -> DataFrame:
         Path to a file or a file-like object.
     json_lines
         Toggle between "JSON" and "NDJSON" format
+
     """
     return DataFrame._read_json(source, json_lines)
 
@@ -1017,9 +1018,10 @@ def read_excel(
     read_csv_options: dict | None = None,
 ) -> DataFrame:
     """
-    Read Excel (XLSX) sheet into a DataFrame by converting an Excel sheet with
-    ``xlsx2csv.Xlsx2csv().convert()`` to CSV and parsing the CSV output with
-    :func:`read_csv`.
+    Read Excel (XLSX) sheet into a DataFrame.
+
+    Converts an Excel sheet with ``xlsx2csv.Xlsx2csv().convert()`` to CSV and parses the
+    CSV output with :func:`read_csv`.
 
     Parameters
     ----------
@@ -1088,8 +1090,8 @@ def read_excel(
 
     >>> excel_file = "test.xlsx"
     >>> pl.from_pandas(pd.read_excel(excel_file))  # doctest: +SKIP
-    """
 
+    """
     try:
         import xlsx2csv  # type: ignore[import]
     except ImportError:
@@ -1155,5 +1157,4 @@ def scan_ds(ds: pa.dataset.dataset) -> LazyFrame:
     └───────┴────────┴────────────┘
 
     """
-
     return _scan_ds(ds)
diff --git a/py-polars/polars/utils.py b/py-polars/polars/utils.py
@@ -1,3 +1,4 @@
+"""Utility functions."""
 from __future__ import annotations
 
 import ctypes
@@ -43,6 +44,7 @@ def _process_null_values(
 # https://stackoverflow.com/questions/4355524/getting-data-from-ctypes-array-into-numpy
 def _ptr_to_numpy(ptr: int, len: int, ptr_type: Any) -> np.ndarray:
     """
+    Create a memory block view as a numpy array.
 
     Parameters
     ----------
@@ -70,17 +72,17 @@ def _timedelta_to_pl_duration(td: timedelta) -> str:
 
 
 def in_nanoseconds_window(dt: datetime) -> bool:
+    """Check whether the given datetime can be represented as a Unix timestamp."""
     return 1386 < dt.year < 2554
 
 
 def timedelta_in_nanoseconds_window(td: timedelta) -> bool:
+    """Check whether the given timedelta can be represented as a Unix timestamp."""
     return in_nanoseconds_window(datetime(1970, 1, 1) + td)
 
 
 def _datetime_to_pl_timestamp(dt: datetime, tu: str | None) -> int:
-    """
-    Converts a python datetime to a timestamp in nanoseconds
-    """
+    """Convert a python datetime to a timestamp in nanoseconds."""
     if tu == "ns":
         return int(dt.replace(tzinfo=timezone.utc).timestamp() * 1e9)
     elif tu == "us":
@@ -119,32 +121,35 @@ def is_str_sequence(
     val: Sequence[object], allow_str: bool = False
 ) -> TypeGuard[Sequence[str]]:
     """
-    Checks that `val` is a sequence of strings. Note that a single string is a sequence
-    of strings by definition, use `allow_str=False` to return False on a single string
+    Check that `val` is a sequence of strings.
+
+    Note that a single string is a sequence of strings by definition, use
+    `allow_str=False` to return False on a single string.
     """
     if (not allow_str) and isinstance(val, str):
         return False
     return _is_iterable_of(val, Sequence, str)
 
 
 def is_int_sequence(val: Sequence[object]) -> TypeGuard[Sequence[int]]:
+    """Check whether the given sequence is a sequence of integers."""
     return _is_iterable_of(val, Sequence, int)
 
 
 def _is_iterable_of(val: Iterable, itertype: type, eltype: type) -> bool:
+    """Check whether the given iterable is of a certain type."""
     return isinstance(val, itertype) and all(isinstance(x, eltype) for x in val)
 
 
 def range_to_slice(rng: range) -> slice:
-    """
-    Return the given range as an equivalent slice.
-    """
+    """Return the given range as an equivalent slice."""
     return slice(rng.start, rng.stop, rng.step)
 
 
 def handle_projection_columns(
     columns: list[str] | list[int] | None,
 ) -> tuple[list[int] | None, list[str] | None]:
+    """Disambiguates between columns specified as integers vs. strings."""
     projection: list[int] | None = None
     if columns:
         if is_int_sequence(columns):
@@ -242,23 +247,20 @@ def _in_notebook() -> bool:
 
 
 def format_path(path: str | Path) -> str:
-    """
-    Returns a string path, expanding the home directory if present.
-    """
+    """Create a string path, expanding the home directory if present."""
     return os.path.expanduser(path)
 
 
 def threadpool_size() -> int:
-    """
-    Get the size of polars; thread pool
-    """
+    """Get the size of polars; thread pool."""
     return _pool_size()
 
 
 def deprecated_alias(**aliases: str) -> Callable:
-    """Decorator for deprecated function and method arguments.
+    """
+    Deprecate a function or method argument.
 
-    Use as follows:
+    Decorator for deprecated function and method arguments. Use as follows:
 
     @deprecated_alias(old_arg='new_arg')
     def myfunc(new_arg):
@@ -268,18 +270,22 @@ def myfunc(new_arg):
     def deco(f: Callable) -> Callable:
         @functools.wraps(f)
         def wrapper(*args: Any, **kwargs: Any) -> Callable:
-            rename_kwargs(f.__name__, kwargs, aliases)
+            _rename_kwargs(f.__name__, kwargs, aliases)
             return f(*args, **kwargs)
 
         return wrapper
 
     return deco
 
 
-def rename_kwargs(
+def _rename_kwargs(
     func_name: str, kwargs: dict[str, str], aliases: dict[str, str]
 ) -> None:
-    """Helper function for deprecating function and method arguments."""
+    """
+    Rename the keyword arguments of a function.
+
+    Helper function for deprecating function and method arguments.
+    """
     for alias, new in aliases.items():
         if alias in kwargs:
             if new in kwargs: