Skip to content

Commit

Permalink
docs[python]: Fix some docstring formatting (#4601)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Aug 29, 2022
1 parent 164936a commit 59e61db
Show file tree
Hide file tree
Showing 8 changed files with 89 additions and 59 deletions.
90 changes: 55 additions & 35 deletions py-polars/polars/internals/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,7 @@ def wrap_df(df: PyDataFrame) -> DataFrame:

class DataFrame:
"""
A DataFrame is a two-dimensional data structure that represents data as a table
with rows and columns.
Two-dimensional data structure representing data as a table with rows and columns.
Parameters
----------
Expand Down Expand Up @@ -312,8 +311,9 @@ def __init__(

def estimated_size(self, unit: SizeUnit = "b") -> int | float:
"""
Return an estimation of the total (heap) allocated size of the `DataFrame` in
bytes (pass `unit` to return estimated size in kilobytes, megabytes, etc)..
Return an estimation of the total (heap) allocated size of the `DataFrame`.
Estimated size is given in the specified unit (bytes by default).
This estimation is the sum of the size of its buffers, validity, including
nested arrays. Multiple arrays may share buffers and bitmaps. Therefore, the
Expand Down Expand Up @@ -773,8 +773,9 @@ def _read_ipc(
memory_map: bool = True,
) -> DataFrame:
"""
Read into a DataFrame from Arrow IPC stream format. This is also called the
Feather (v2) format.
Read into a DataFrame from Arrow IPC stream format.
Arrow IPC is also know as Feather (v2).
Parameters
----------
Expand Down Expand Up @@ -842,9 +843,12 @@ def _read_json(
json_lines: bool = False,
) -> DF:
"""
Read into a DataFrame from JSON format.
See Also
--------
read_json
"""
if isinstance(file, StringIO):
file = BytesIO(file.getvalue().encode())
Expand All @@ -858,6 +862,7 @@ def _read_json(
def to_arrow(self) -> pa.Table:
"""
Collect the underlying arrow arrays in an Arrow Table.
This operation is mostly zero copy.
Data types that do copy:
Expand Down Expand Up @@ -1064,6 +1069,7 @@ def to_pandas(
) -> pd.DataFrame:
"""
Cast to a pandas DataFrame.
This requires that pandas and pyarrow are installed.
This operation clones data.
Expand Down Expand Up @@ -1450,7 +1456,8 @@ def write_parquet(

def to_numpy(self) -> np.ndarray[Any, Any]:
"""
Convert DataFrame to a 2d numpy array.
Convert DataFrame to a 2D NumPy array.
This operation clones data.
Notes
Expand Down Expand Up @@ -2322,7 +2329,7 @@ def dtypes(self) -> list[type[DataType]]:
@property
def schema(self) -> dict[str, type[DataType]]:
"""
Get a dict[column name, DataType]
Get a dict[column name, DataType].
Examples
--------
Expand Down Expand Up @@ -2973,18 +2980,14 @@ def groupby_rolling(
by: str | list[str] | pli.Expr | list[pli.Expr] | None = None,
) -> RollingGroupBy[DF]:
"""
Create rolling groups based on a time column (or index value of type Int32,
Int64).
Create rolling groups based on a time column.
Also works for index values of type Int32 or Int64.
Different from a rolling groupby the windows are now determined by the
individual values and are not of constant intervals. For constant intervals use
*groupby_dynamic*
.. seealso::
groupby_dynamic
The `period` and `offset` arguments are created with
the following string language:
Expand Down Expand Up @@ -3028,6 +3031,10 @@ def groupby_rolling(
by
Also group by this column/these columns
See Also
--------
groupby_dynamic
Examples
--------
>>> dates = [
Expand Down Expand Up @@ -3505,8 +3512,10 @@ def join_asof(
force_parallel: bool = False,
) -> DataFrame:
"""
Perform an asof join. This is similar to a left-join except that we
match on nearest key rather than equal keys.
Perform an asof join.
This is similar to a left-join except that we match on nearest key rather than
equal keys.
Both DataFrames must be sorted by the asof_join key.
Expand Down Expand Up @@ -3764,8 +3773,9 @@ def apply(
inference_size: int = 256,
) -> DF:
"""
Apply a custom function over the rows of the DataFrame. The rows are passed as
tuple.
Apply a custom function over the rows of the DataFrame.
The rows are passed as tuple.
Implementing logic using this .apply method is generally slower and more memory
intensive than implementing the same logic using the expression API because:
Expand Down Expand Up @@ -4148,8 +4158,9 @@ def drop_in_place(self, name: str) -> pli.Series:

def cleared(self: DF) -> DF:
"""
Create an empty copy of the current DataFrame, with identical schema but no
data.
Create an empty copy of the current DataFrame.
Returns a DataFrame with identical schema but no data.
See Also
--------
Expand Down Expand Up @@ -4560,8 +4571,9 @@ def melt(
value_name: str | None = None,
) -> DF:
"""
Unpivot a DataFrame from wide to long format, optionally leaving identifiers
set.
Unpivot a DataFrame from wide to long format.
Optionally leaves identifiers set.
This function is useful to massage a DataFrame into a format where one or more
columns are identifier variables (id_vars), while all other columns, considered
Expand Down Expand Up @@ -4737,14 +4749,17 @@ def partition_by(

def shift(self: DF, periods: int) -> DF:
"""
Shift the values by a given period and fill the parts that will be empty due to
this operation with `Nones`.
Shift values by the given period.
Parameters
----------
periods
Number of places to shift (may be negative).
See Also
--------
shift_and_fill
Examples
--------
>>> df = pl.DataFrame(
Expand Down Expand Up @@ -4786,8 +4801,10 @@ def shift(self: DF, periods: int) -> DF:

def shift_and_fill(self, periods: int, fill_value: int | str | float) -> DataFrame:
"""
Shift the values by a given period and fill the parts that will be empty due to
this operation with the result of the `fill_value` expression.
Shift values by the given period and fill resulting null values.
Fill the parts that will be empty due to this operation with the result of the
`fill_value` expression.
Parameters
----------
Expand Down Expand Up @@ -5382,7 +5399,7 @@ def median(self: DF) -> DF:

def product(self) -> DataFrame:
"""
Aggregate the columns of this DataFrame to their product values
Aggregate the columns of this DataFrame to their product values.
Examples
--------
Expand Down Expand Up @@ -5636,9 +5653,11 @@ def fold(
self, operation: Callable[[pli.Series, pli.Series], pli.Series]
) -> pli.Series:
"""
Apply a horizontal reduction on a DataFrame. This can be used to effectively
determine aggregations on a row level, and can be applied to any DataType that
can be supercasted (casted to a similar parent type).
Apply a horizontal reduction on a DataFrame.
This can be used to effectively determine aggregations on a row level, and can
be applied to any DataType that can be supercasted (casted to a similar parent
type).
An example of the supercast rules when applying an arithmetic operation on two
DataTypes are for instance:
Expand Down Expand Up @@ -5782,8 +5801,9 @@ def shrink_to_fit(self: DF, in_place: bool) -> DF | None:

def shrink_to_fit(self: DF, in_place: bool = False) -> DF | None:
"""
Shrink memory usage of this DataFrame to fit the exact capacity needed to hold
the data.
Shrink DataFrame memory usage.
Shrinks to fit the exact capacity needed to hold the data.
"""
if in_place:
self._df.shrink_to_fit()
Expand Down Expand Up @@ -5897,7 +5917,7 @@ def interpolate(self) -> DataFrame:

def is_empty(self) -> bool:
"""
Check if the dataframe is empty
Check if the dataframe is empty.
Examples
--------
Expand All @@ -5912,7 +5932,7 @@ def is_empty(self) -> bool:

def to_struct(self, name: str) -> pli.Series:
"""
Convert a ``DataFrame`` to a ``Series`` of type ``Struct``
Convert a ``DataFrame`` to a ``Series`` of type ``Struct``.
Parameters
----------
Expand Down
41 changes: 27 additions & 14 deletions py-polars/polars/internals/dataframe/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,15 @@ def _select_all(self) -> GBSelection[DF]:

def _groups(self) -> DF: # pragma: no cover
"""
Return a `DataFrame` with:
Get keys and group indices for each group in the groupby.
* the groupby keys
* the group indexes aggregated as lists
Returns
-------
DataFrame
A DataFrame with:
- the groupby keys
- the group indexes aggregated as lists
Examples
--------
Expand Down Expand Up @@ -249,8 +254,9 @@ def apply(self, f: Callable[[pli.DataFrame], pli.DataFrame]) -> DF:

def agg(self, aggs: pli.Expr | Sequence[pli.Expr]) -> pli.DataFrame:
"""
Use multiple aggregations on columns. This can be combined with complete lazy
API and is considered idiomatic polars.
Use multiple aggregations on columns.
This can be combined with complete lazy API and is considered idiomatic polars.
Parameters
----------
Expand Down Expand Up @@ -425,12 +431,10 @@ def pivot(
self, pivot_column: str | list[str], values_column: str | list[str]
) -> PivotOps[DF]:
"""
Do a pivot operation based on the group key, a pivot column and an aggregation
function on the values column.
Do a pivot operation.
.. note::
Polars'/arrow memory is not ideal for transposing operations like pivots.
If you have a relatively large table, consider using a groupby over a pivot.
The pivot operation is based on the group key, a pivot column and an aggregation
function on the values column.
Parameters
----------
Expand All @@ -439,6 +443,11 @@ def pivot(
values_column
Column that will be aggregated.
Notes
-----
Polars'/arrow memory is not ideal for transposing operations like pivots.
If you have a relatively large table, consider using a groupby over a pivot.
Examples
--------
>>> df = pl.DataFrame(
Expand Down Expand Up @@ -814,8 +823,10 @@ def agg_list(self) -> pli.DataFrame:

class RollingGroupBy(Generic[DF]):
"""
A rolling grouper. This has an `.agg` method which will allow you to run all polars
expressions in a groupby context.
A rolling grouper.
This has an `.agg` method which will allow you to run all polars expressions in a
groupby context.
"""

def __init__(
Expand Down Expand Up @@ -847,8 +858,10 @@ def agg(self, aggs: pli.Expr | Sequence[pli.Expr]) -> pli.DataFrame:

class DynamicGroupBy(Generic[DF]):
"""
A dynamic grouper. This has an `.agg` method which will allow you to run all polars
expressions in a groupby context.
A dynamic grouper.
This has an `.agg` method which will allow you to run all polars expressions in a
groupby context.
"""

def __init__(
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/internals/expr/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ def nanosecond(self) -> pli.Expr:

def epoch(self, tu: EpochTimeUnit = "us") -> pli.Expr:
"""
Get the time passed since the Unix EPOCH in the give time unit
Get the time passed since the Unix EPOCH in the give time unit.
Parameters
----------
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/internals/expr/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def field(self, name: str) -> pli.Expr:

def rename_fields(self, names: list[str]) -> pli.Expr:
"""
Rename the fields of the struct
Rename the fields of the struct.
Parameters
----------
Expand Down
3 changes: 1 addition & 2 deletions py-polars/tests/run_doc_examples.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""
Run all doctest examples inside the `polars` module using Python's built-in doctest
module.
Run all doctest examples of the `polars` module using Python's built-in doctest module.
How to check examples: run this script, if exits with code 0, all is good. Otherwise,
the errors will be reported.
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/test_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

@pytest.fixture()
def environ() -> Iterator[None]:
"""Fixture to restore the environment variables after the test"""
"""Fixture to restore the environment variables after the test."""
old_environ = dict(os.environ)
yield
os.environ.clear()
Expand Down
6 changes: 2 additions & 4 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -2095,10 +2095,8 @@ def test_indexing_set() -> None:


def test_set() -> None:
"""
Setting a dataframe using indices is deprecated. We keep these tests because we
only generate a warning
"""
# Setting a dataframe using indices is deprecated.
# We keep these tests because we only generate a warning.
np.random.seed(1)
df = pl.DataFrame(
{"foo": np.random.rand(10), "bar": np.arange(10), "ham": ["h"] * 10}
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1954,7 +1954,7 @@ def test_repr() -> None:
assert str(n) in s_repr

class XSeries(pl.Series):
"""Custom Series class"""
"""Custom Series class."""

# check custom class name reflected in repr ouput
x = XSeries("ints", [1001, 2002, 3003])
Expand Down

0 comments on commit 59e61db

Please sign in to comment.