docs[python]: Fix some docstring formatting (#4601)

pola-rs · Aug 29, 2022 · 59e61db · 59e61db
1 parent 164936a
commit 59e61db
Show file tree

Hide file tree

Showing 8 changed files with 89 additions and 59 deletions.
diff --git a/py-polars/polars/internals/dataframe/frame.py b/py-polars/polars/internals/dataframe/frame.py
@@ -142,8 +142,7 @@ def wrap_df(df: PyDataFrame) -> DataFrame:
 
 class DataFrame:
     """
-    A DataFrame is a two-dimensional data structure that represents data as a table
-    with rows and columns.
+    Two-dimensional data structure representing data as a table with rows and columns.
 
     Parameters
     ----------
@@ -312,8 +311,9 @@ def __init__(
 
     def estimated_size(self, unit: SizeUnit = "b") -> int | float:
         """
-        Return an estimation of the total (heap) allocated size of the `DataFrame` in
-        bytes (pass `unit` to return estimated size in kilobytes, megabytes, etc)..
+        Return an estimation of the total (heap) allocated size of the `DataFrame`.
+
+        Estimated size is given in the specified unit (bytes by default).
 
         This estimation is the sum of the size of its buffers, validity, including
         nested arrays. Multiple arrays may share buffers and bitmaps. Therefore, the
@@ -773,8 +773,9 @@ def _read_ipc(
         memory_map: bool = True,
     ) -> DataFrame:
         """
-        Read into a DataFrame from Arrow IPC stream format. This is also called the
-        Feather (v2) format.
+        Read into a DataFrame from Arrow IPC stream format.
+
+        Arrow IPC is also know as Feather (v2).
 
         Parameters
         ----------
@@ -842,9 +843,12 @@ def _read_json(
         json_lines: bool = False,
     ) -> DF:
         """
+        Read into a DataFrame from JSON format.
+
         See Also
         --------
         read_json
+
         """
         if isinstance(file, StringIO):
             file = BytesIO(file.getvalue().encode())
@@ -858,6 +862,7 @@ def _read_json(
     def to_arrow(self) -> pa.Table:
         """
         Collect the underlying arrow arrays in an Arrow Table.
+
         This operation is mostly zero copy.
 
         Data types that do copy:
@@ -1064,6 +1069,7 @@ def to_pandas(
     ) -> pd.DataFrame:
         """
         Cast to a pandas DataFrame.
+
         This requires that pandas and pyarrow are installed.
         This operation clones data.
 
@@ -1450,7 +1456,8 @@ def write_parquet(
 
     def to_numpy(self) -> np.ndarray[Any, Any]:
         """
-        Convert DataFrame to a 2d numpy array.
+        Convert DataFrame to a 2D NumPy array.
+
         This operation clones data.
 
         Notes
@@ -2322,7 +2329,7 @@ def dtypes(self) -> list[type[DataType]]:
     @property
     def schema(self) -> dict[str, type[DataType]]:
         """
-        Get a dict[column name, DataType]
+        Get a dict[column name, DataType].
 
         Examples
         --------
@@ -2973,18 +2980,14 @@ def groupby_rolling(
         by: str | list[str] | pli.Expr | list[pli.Expr] | None = None,
     ) -> RollingGroupBy[DF]:
         """
-        Create rolling groups based on a time column (or index value of type Int32,
-        Int64).
+        Create rolling groups based on a time column.
+
+        Also works for index values of type Int32 or Int64.
 
         Different from a rolling groupby the windows are now determined by the
         individual values and are not of constant intervals. For constant intervals use
         *groupby_dynamic*
 
-        .. seealso::
-
-            groupby_dynamic
-
-
         The `period` and `offset` arguments are created with
         the following string language:
 
@@ -3028,6 +3031,10 @@ def groupby_rolling(
         by
             Also group by this column/these columns
 
+        See Also
+        --------
+        groupby_dynamic
+
         Examples
         --------
         >>> dates = [
@@ -3505,8 +3512,10 @@ def join_asof(
         force_parallel: bool = False,
     ) -> DataFrame:
         """
-        Perform an asof join. This is similar to a left-join except that we
-        match on nearest key rather than equal keys.
+        Perform an asof join.
+
+        This is similar to a left-join except that we match on nearest key rather than
+        equal keys.
 
         Both DataFrames must be sorted by the asof_join key.
 
@@ -3764,8 +3773,9 @@ def apply(
         inference_size: int = 256,
     ) -> DF:
         """
-        Apply a custom function over the rows of the DataFrame. The rows are passed as
-        tuple.
+        Apply a custom function over the rows of the DataFrame.
+
+        The rows are passed as tuple.
 
         Implementing logic using this .apply method is generally slower and more memory
         intensive than implementing the same logic using the expression API because:
@@ -4148,8 +4158,9 @@ def drop_in_place(self, name: str) -> pli.Series:
 
     def cleared(self: DF) -> DF:
         """
-        Create an empty copy of the current DataFrame, with identical schema but no
-        data.
+        Create an empty copy of the current DataFrame.
+
+        Returns a DataFrame with identical schema but no data.
 
         See Also
         --------
@@ -4560,8 +4571,9 @@ def melt(
         value_name: str | None = None,
     ) -> DF:
         """
-        Unpivot a DataFrame from wide to long format, optionally leaving identifiers
-        set.
+        Unpivot a DataFrame from wide to long format.
+
+        Optionally leaves identifiers set.
 
         This function is useful to massage a DataFrame into a format where one or more
         columns are identifier variables (id_vars), while all other columns, considered
@@ -4737,14 +4749,17 @@ def partition_by(
 
     def shift(self: DF, periods: int) -> DF:
         """
-        Shift the values by a given period and fill the parts that will be empty due to
-        this operation with `Nones`.
+        Shift values by the given period.
 
         Parameters
         ----------
         periods
             Number of places to shift (may be negative).
 
+        See Also
+        --------
+        shift_and_fill
+
         Examples
         --------
         >>> df = pl.DataFrame(
@@ -4786,8 +4801,10 @@ def shift(self: DF, periods: int) -> DF:
 
     def shift_and_fill(self, periods: int, fill_value: int | str | float) -> DataFrame:
         """
-        Shift the values by a given period and fill the parts that will be empty due to
-        this operation with the result of the `fill_value` expression.
+        Shift values by the given period and fill resulting null values.
+
+        Fill the parts that will be empty due to this operation with the result of the
+        `fill_value` expression.
 
         Parameters
         ----------
@@ -5382,7 +5399,7 @@ def median(self: DF) -> DF:
 
     def product(self) -> DataFrame:
         """
-        Aggregate the columns of this DataFrame to their product values
+        Aggregate the columns of this DataFrame to their product values.
 
         Examples
         --------
@@ -5636,9 +5653,11 @@ def fold(
         self, operation: Callable[[pli.Series, pli.Series], pli.Series]
     ) -> pli.Series:
         """
-        Apply a horizontal reduction on a DataFrame. This can be used to effectively
-        determine aggregations on a row level, and can be applied to any DataType that
-        can be supercasted (casted to a similar parent type).
+        Apply a horizontal reduction on a DataFrame.
+
+        This can be used to effectively determine aggregations on a row level, and can
+        be applied to any DataType that can be supercasted (casted to a similar parent
+        type).
 
         An example of the supercast rules when applying an arithmetic operation on two
         DataTypes are for instance:
@@ -5782,8 +5801,9 @@ def shrink_to_fit(self: DF, in_place: bool) -> DF | None:
 
     def shrink_to_fit(self: DF, in_place: bool = False) -> DF | None:
         """
-        Shrink memory usage of this DataFrame to fit the exact capacity needed to hold
-        the data.
+        Shrink DataFrame memory usage.
+
+        Shrinks to fit the exact capacity needed to hold the data.
         """
         if in_place:
             self._df.shrink_to_fit()
@@ -5897,7 +5917,7 @@ def interpolate(self) -> DataFrame:
 
     def is_empty(self) -> bool:
         """
-        Check if the dataframe is empty
+        Check if the dataframe is empty.
 
         Examples
         --------
@@ -5912,7 +5932,7 @@ def is_empty(self) -> bool:
 
     def to_struct(self, name: str) -> pli.Series:
         """
-        Convert a ``DataFrame`` to a ``Series`` of type ``Struct``
+        Convert a ``DataFrame`` to a ``Series`` of type ``Struct``.
 
         Parameters
         ----------

diff --git a/py-polars/polars/internals/dataframe/groupby.py b/py-polars/polars/internals/dataframe/groupby.py
@@ -127,10 +127,15 @@ def _select_all(self) -> GBSelection[DF]:
 
     def _groups(self) -> DF:  # pragma: no cover
         """
-        Return a `DataFrame` with:
+        Get keys and group indices for each group in the groupby.
 
-        * the groupby keys
-        * the group indexes aggregated as lists
+        Returns
+        -------
+        DataFrame
+            A DataFrame with:
+
+            - the groupby keys
+            - the group indexes aggregated as lists
 
         Examples
         --------
@@ -249,8 +254,9 @@ def apply(self, f: Callable[[pli.DataFrame], pli.DataFrame]) -> DF:
 
     def agg(self, aggs: pli.Expr | Sequence[pli.Expr]) -> pli.DataFrame:
         """
-        Use multiple aggregations on columns. This can be combined with complete lazy
-        API and is considered idiomatic polars.
+        Use multiple aggregations on columns.
+
+        This can be combined with complete lazy API and is considered idiomatic polars.
 
         Parameters
         ----------
@@ -425,12 +431,10 @@ def pivot(
         self, pivot_column: str | list[str], values_column: str | list[str]
     ) -> PivotOps[DF]:
         """
-        Do a pivot operation based on the group key, a pivot column and an aggregation
-        function on the values column.
+        Do a pivot operation.
 
-        .. note::
-            Polars'/arrow memory is not ideal for transposing operations like pivots.
-            If you have a relatively large table, consider using a groupby over a pivot.
+        The pivot operation is based on the group key, a pivot column and an aggregation
+        function on the values column.
 
         Parameters
         ----------
@@ -439,6 +443,11 @@ def pivot(
         values_column
             Column that will be aggregated.
 
+        Notes
+        -----
+        Polars'/arrow memory is not ideal for transposing operations like pivots.
+        If you have a relatively large table, consider using a groupby over a pivot.
+
         Examples
         --------
         >>> df = pl.DataFrame(
@@ -814,8 +823,10 @@ def agg_list(self) -> pli.DataFrame:
 
 class RollingGroupBy(Generic[DF]):
     """
-    A rolling grouper. This has an `.agg` method which will allow you to run all polars
-    expressions in a groupby context.
+    A rolling grouper.
+
+    This has an `.agg` method which will allow you to run all polars expressions in a
+    groupby context.
     """
 
     def __init__(
@@ -847,8 +858,10 @@ def agg(self, aggs: pli.Expr | Sequence[pli.Expr]) -> pli.DataFrame:
 
 class DynamicGroupBy(Generic[DF]):
     """
-    A dynamic grouper. This has an `.agg` method which will allow you to run all polars
-    expressions in a groupby context.
+    A dynamic grouper.
+
+    This has an `.agg` method which will allow you to run all polars expressions in a
+    groupby context.
     """
 
     def __init__(

diff --git a/py-polars/polars/internals/expr/datetime.py b/py-polars/polars/internals/expr/datetime.py
@@ -669,7 +669,7 @@ def nanosecond(self) -> pli.Expr:
 
     def epoch(self, tu: EpochTimeUnit = "us") -> pli.Expr:
         """
-        Get the time passed since the Unix EPOCH in the give time unit
+        Get the time passed since the Unix EPOCH in the give time unit.
 
         Parameters
         ----------

diff --git a/py-polars/polars/internals/expr/struct.py b/py-polars/polars/internals/expr/struct.py
@@ -59,7 +59,7 @@ def field(self, name: str) -> pli.Expr:
 
     def rename_fields(self, names: list[str]) -> pli.Expr:
         """
-        Rename the fields of the struct
+        Rename the fields of the struct.
 
         Parameters
         ----------

diff --git a/py-polars/tests/run_doc_examples.py b/py-polars/tests/run_doc_examples.py
@@ -1,6 +1,5 @@
 """
-Run all doctest examples inside the `polars` module using Python's built-in doctest
-module.
+Run all doctest examples of the `polars` module using Python's built-in doctest module.
 
 How to check examples: run this script, if exits with code 0, all is good. Otherwise,
 the errors will be reported.

diff --git a/py-polars/tests/test_cfg.py b/py-polars/tests/test_cfg.py
@@ -10,7 +10,7 @@
 
 @pytest.fixture()
 def environ() -> Iterator[None]:
-    """Fixture to restore the environment variables after the test"""
+    """Fixture to restore the environment variables after the test."""
     old_environ = dict(os.environ)
     yield
     os.environ.clear()

diff --git a/py-polars/tests/test_df.py b/py-polars/tests/test_df.py
@@ -2095,10 +2095,8 @@ def test_indexing_set() -> None:
 
 
 def test_set() -> None:
-    """
-    Setting a dataframe using indices is deprecated. We keep these tests because we
-    only generate a warning
-    """
+    # Setting a dataframe using indices is deprecated.
+    # We keep these tests because we only generate a warning.
     np.random.seed(1)
     df = pl.DataFrame(
         {"foo": np.random.rand(10), "bar": np.arange(10), "ham": ["h"] * 10}

diff --git a/py-polars/tests/test_series.py b/py-polars/tests/test_series.py
@@ -1954,7 +1954,7 @@ def test_repr() -> None:
         assert str(n) in s_repr
 
     class XSeries(pl.Series):
-        """Custom Series class"""
+        """Custom Series class."""
 
     # check custom class name reflected in repr ouput
     x = XSeries("ints", [1001, 2002, 3003])