Skip to content

Commit

Permalink
docs examples (#3532)
Browse files Browse the repository at this point in the history
  • Loading branch information
moritzwilksch committed May 30, 2022
1 parent bdd96e0 commit fce5bcd
Showing 1 changed file with 214 additions and 1 deletion.
215 changes: 214 additions & 1 deletion py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1900,6 +1900,27 @@ def insert_at_idx(self, index: int, series: "pli.Series") -> None:
Column to insert the new `Series` column.
series
`Series` to insert.
Examples
--------
>>> df = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
>>> s = pl.Series("baz", [97, 98, 99])
>>> df.insert_at_idx(1, s) # returns None
>>> df
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ baz ┆ bar │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ 1 ┆ 97 ┆ 4 │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ 2 ┆ 98 ┆ 5 │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ 3 ┆ 99 ┆ 6 │
└─────┴─────┴─────┘
"""
self._df.insert_at_idx(index, series._s)

Expand Down Expand Up @@ -3377,6 +3398,51 @@ def join_asof(
Allow the physical plan to optionally evaluate the computation of both DataFrames up to the join in parallel.
force_parallel
Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel.
Examples
--------
>>> from datetime import datetime
>>> gdp = pl.DataFrame(
... {
... "date": [
... datetime(2016, 1, 1),
... datetime(2017, 1, 1),
... datetime(2018, 1, 1),
... datetime(2019, 1, 1),
... ], # note record date: Jan 1st (sorted!)
... "gdp": [4164, 4411, 4566, 4696],
... }
... )
>>> population = pl.DataFrame(
... {
... "date": [
... datetime(2016, 5, 12),
... datetime(2017, 5, 12),
... datetime(2018, 5, 12),
... datetime(2019, 5, 12),
... ], # note record date: May 12th (sorted!)
... "population": [82.19, 82.66, 83.12, 83.52],
... }
... )
>>> population.join_asof(
... gdp, left_on="date", right_on="date", strategy="backward"
... )
shape: (4, 3)
┌─────────────────────┬────────────┬──────┐
│ date ┆ population ┆ gdp │
│ --- ┆ --- ┆ --- │
│ datetime[μs] ┆ f64 ┆ i64 │
╞═════════════════════╪════════════╪══════╡
│ 2016-05-12 00:00:00 ┆ 82.19 ┆ 4164 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2017-05-12 00:00:00 ┆ 82.66 ┆ 4411 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2018-05-12 00:00:00 ┆ 83.12 ┆ 4566 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2019-05-12 00:00:00 ┆ 83.52 ┆ 4696 │
└─────────────────────┴────────────┴──────┘
"""
return (
self.lazy()
Expand Down Expand Up @@ -3571,6 +3637,39 @@ def apply(
Only used in the case when the custom function returns rows.
This uses the first `n` rows to determine the output schema
Examples
--------
>>> df = pl.DataFrame({"foo": [1, 2, 3], "bar": [-1, 5, 8]})
# return rows
>>> df.apply(lambda t: (t[0] * 2, t[1] * 3))
shape: (3, 2)
┌──────────┬──────────┐
│ column_0 ┆ column_1 │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞══════════╪══════════╡
│ 2 ┆ -3 │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
│ 4 ┆ 15 │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
│ 6 ┆ 24 │
└──────────┴──────────┘
# return scalar
>>> df.apply(lambda t: (t[0] * 2 + t[1]))
shape: (3, 1)
┌───────┐
│ apply │
│ --- │
│ i64 │
╞═══════╡
│ 1 │
├╌╌╌╌╌╌╌┤
│ 9 │
├╌╌╌╌╌╌╌┤
│ 14 │
└───────┘
"""
out, is_df = self._df.apply(f, return_dtype, inference_size)
if is_df:
Expand Down Expand Up @@ -3765,6 +3864,33 @@ def extend(self, other: "DataFrame") -> None:
----------
other
DataFrame to vertically add.
Examples
--------
>>> df1 = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
>>> df2 = pl.DataFrame({"foo": [10, 20, 30], "bar": [40, 50, 60]})
>>> df1.extend(df2) # returns None
>>> df1
shape: (6, 2)
┌─────┬─────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 4 │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 2 ┆ 5 │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 3 ┆ 6 │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 10 ┆ 40 │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 20 ┆ 50 │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 30 ┆ 60 │
└─────┴─────┘
"""
self._df.extend(other._df)

Expand Down Expand Up @@ -3888,19 +4014,57 @@ def __deepcopy__(self: DF, memodict={}) -> DF: # type: ignore
def get_columns(self) -> List["pli.Series"]:
"""
Get the DataFrame as a List of Series.
Examples
--------
>>> df = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
>>> df.get_columns()
[shape: (3,)
Series: 'foo' [i64]
[
1
2
3
], shape: (3,)
Series: 'bar' [i64]
[
4
5
6
]]
"""
return list(map(lambda s: pli.wrap_s(s), self._df.get_columns()))

def get_column(self, name: str) -> "pli.Series":
"""
Get a single column as Series by name.
Examples
--------
>>> df = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
>>> df.get_column("foo")
shape: (3,)
Series: 'foo' [i64]
[
1
2
3
]
"""
return self[name]

def fill_null(self: DF, strategy: Union[str, "pli.Expr", Any]) -> DF:
"""
Fill null values using a filling strategy, literal, or Expr.
.. seealso::
fill_nan
Parameters
----------
strategy
Expand Down Expand Up @@ -3928,9 +4092,13 @@ def fill_nan(self: DF, fill_value: Union["pli.Expr", int, float]) -> DF:
"""
Fill floating point NaN values by an Expression evaluation.
.. seealso::
fill_null
Warnings
--------
NOTE that floating point NaN (No a Number) are not missing values!
NOTE that floating point NaNs (Not a Number) are not missing values!
to replace missing values, use `fill_null`.
Parameters
Expand Down Expand Up @@ -4806,6 +4974,21 @@ def median(self: DF) -> DF:
def product(self: DF) -> DF:
"""
Aggregate the columns of this DataFrame to their product values
Examples
--------
>>> df = pl.DataFrame({"factors": [1.0, 0.99, 1.3, 0.87]})
>>> df.product()
shape: (1, 1)
┌─────────┐
│ factors │
│ --- │
│ f64 │
╞═════════╡
│ 1.1196 │
└─────────┘
"""
return self.select(pli.all().product())

Expand Down Expand Up @@ -4893,6 +5076,9 @@ def unique(
) -> DF:
"""
Drop duplicate rows from this DataFrame.
Warnings
--------
Note that this fails if there is a column of type `List` in the DataFrame or subset.
Parameters
Expand All @@ -4907,6 +5093,25 @@ def unique(
Returns
-------
DataFrame with unique rows
Examples
--------
>>> df = pl.DataFrame({"x": [1, 2, 3, 2, 1], "y": [3, 2, 1, 2, 3]})
>>> df.unique()
shape: (3, 2)
┌─────┬─────┐
│ x ┆ y │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 3 │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 2 ┆ 2 │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 3 ┆ 1 │
└─────┴─────┘
"""
if subset is not None and not isinstance(subset, list):
subset = [subset]
Expand Down Expand Up @@ -5239,6 +5444,14 @@ def interpolate(self: DF) -> DF:
def is_empty(self) -> bool:
"""
Check if the dataframe is empty
Examples
--------
>>> df = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
>>> df.filter(pl.col("foo") > 99).is_empty()
True
"""
return self.height == 0

Expand Down

0 comments on commit fce5bcd

Please sign in to comment.