Skip to content

Commit

Permalink
docs: add mwe and internal links (#4019)
Browse files Browse the repository at this point in the history
  • Loading branch information
thatlittleboy committed Jul 15, 2022
1 parent a86faf6 commit 6812c87
Show file tree
Hide file tree
Showing 6 changed files with 251 additions and 89 deletions.
48 changes: 45 additions & 3 deletions py-polars/polars/internals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5620,22 +5620,44 @@ def zfill(self, alignment: int) -> Expr:

def ljust(self, width: int, fillchar: str = " ") -> Expr:
"""
Return the string left justified in a string of length width.
Return the string left justified in a string of length ``width``.
Padding is done using the specified ``fillchar``.
The original string is returned if width is less than or equal to ``len(s)``.
The original string is returned if ``width`` is less than or equal to ``len(s)``.
Parameters
----------
width
Justify left to this length.
fillchar
Fill with this ASCII character.
Examples
--------
>>> df = pl.DataFrame({"a": ["cow", "monkey", None, "hippopotamus"]})
>>> df.select(pl.col("a").str.ljust(8, "*"))
shape: (4, 1)
┌──────────────┐
│ a │
│ --- │
│ str │
╞══════════════╡
│ cow***** │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ monkey** │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ null │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ hippopotamus │
└──────────────┘
"""
return wrap_expr(self._pyexpr.str_ljust(width, fillchar))

def rjust(self, width: int, fillchar: str = " ") -> Expr:
"""
Return the string right justified in a string of length width.
Return the string right justified in a string of length ``width``.
Padding is done using the specified ``fillchar``.
The original string is returned if ``width`` is less than or equal to ``len(s)``.
Expand All @@ -5645,6 +5667,26 @@ def rjust(self, width: int, fillchar: str = " ") -> Expr:
Justify right to this length.
fillchar
Fill with this ASCII character.
Examples
--------
>>> df = pl.DataFrame({"a": ["cow", "monkey", None, "hippopotamus"]})
>>> df.select(pl.col("a").str.rjust(8, "*"))
shape: (4, 1)
┌──────────────┐
│ a │
│ --- │
│ str │
╞══════════════╡
│ *****cow │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ **monkey │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ null │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ hippopotamus │
└──────────────┘
"""
return wrap_expr(self._pyexpr.str_rjust(width, fillchar))

Expand Down
56 changes: 43 additions & 13 deletions py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4125,13 +4125,41 @@ def select_at_idx(self, idx: int) -> pli.Series:
def cleared(self: DF) -> DF:
"""
Create an empty copy of the current DataFrame, with identical schema but no data.
See Also
--------
clone : Cheap deepcopy/clone.
Examples
--------
>>> df = pl.DataFrame(
... {
... "a": [None, 2, 3, 4],
... "b": [0.5, None, 2.5, 13],
... "c": [True, True, False, None],
... }
... )
>>> df.cleared()
shape: (0, 3)
┌─────┬─────┬──────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ bool │
╞═════╪═════╪══════╡
└─────┴─────┴──────┘
"""
return self.head(0) if len(self) > 0 else self.clone()

def clone(self: DF) -> DF:
"""
Cheap deepcopy/clone.
See Also
--------
cleared : Create an empty copy of the current DataFrame, with identical
schema but no data.
Examples
--------
>>> df = pl.DataFrame(
Expand Down Expand Up @@ -5712,36 +5740,38 @@ def hash_rows(
) -> pli.Series:
"""
Hash and combine the rows in this DataFrame.
Hash value is UInt64.
The hash value is of type `UInt64`.
Parameters
----------
k0
seed parameter
Seed parameter.
k1
seed parameter
Seed parameter.
k2
seed parameter
Seed parameter.
k3
seed parameter
Seed parameter.
Examples
--------
>>> df = pl.DataFrame(
... {
... "foo": [1, 2, 3],
... "bar": [6, 7, 8],
... "ham": ["a", "b", "c"],
... "foo": [1, None, 3, 4],
... "ham": ["a", "b", None, "d"],
... }
... )
>>> df.hash(k0=42) # doctest: +SKIP
shape: (3,)
>>> df.hash_rows(k0=42)
shape: (4,)
Series: '' [u64]
[
1208206736888326229
8040480609798856146
18282897888575762835
13491910696687648691
5223969663565791681
4754614259239603444
162820313037838626
]
"""
return pli.wrap_s(self._df.hash_rows(k0, k1, k2, k3))

Expand Down
67 changes: 30 additions & 37 deletions py-polars/polars/internals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,19 +179,19 @@ def date_range(
high
Upper bound of the date range.
interval
Interval periods
A python timedelta object or a polars duration `str`
e.g.: "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
closed {None, 'left', 'right', 'both', 'none'}
Interval periods. It can be a python timedelta object, like ``timedelta(days=10)``,
or a polars duration string, such as ``3d12h4m25s`` representing 3 days, 12 hours,
4 minutes, and 25 seconds.
closed : {None, 'left', 'right', 'both', 'none'}
Make the interval closed to the 'left', 'right', 'none' or 'both' sides.
name
Name of the output Series.
time_unit
Set the time unit; one of {'ns', 'us', 'ms'}.
time_unit : {'ns', 'us', 'ms'}
Set the time unit.
Notes
-----
If both `low` and `high` are passed as date types (not datetime), and the
If both ``low`` and ``high`` are passed as date types (not datetime), and the
interval granularity is no finer than 1d, the returned range is also of
type date. All other permutations return a datetime Series.
Expand All @@ -201,10 +201,29 @@ def date_range(
Examples
--------
>>> from datetime import datetime, date
>>> pl.date_range(datetime(1985, 1, 1), datetime(2015, 7, 1), "1d12h")
shape: (7426,)
Series: '' [datetime[ns]]
Using polars duration string to specify the interval:
>>> from datetime import date
>>> pl.date_range(date(2022, 1, 1), date(2022, 3, 1), "1mo", name="drange")
shape: (3,)
Series: 'drange' [date]
[
2022-01-01
2022-02-01
2022-03-01
]
Using `timedelta` object to specify the interval:
>>> from datetime import datetime, timedelta
>>> pl.date_range(
... datetime(1985, 1, 1),
... datetime(1985, 1, 10),
... timedelta(days=1, hours=12),
... time_unit="ms",
... )
shape: (7,)
Series: '' [datetime[ms]]
[
1985-01-01 00:00:00
1985-01-02 12:00:00
Expand All @@ -213,34 +232,8 @@ def date_range(
1985-01-07 00:00:00
1985-01-08 12:00:00
1985-01-10 00:00:00
1985-01-11 12:00:00
1985-01-13 00:00:00
1985-01-14 12:00:00
1985-01-16 00:00:00
1985-01-17 12:00:00
...
2015-06-14 00:00:00
2015-06-15 12:00:00
2015-06-17 00:00:00
2015-06-18 12:00:00
2015-06-20 00:00:00
2015-06-21 12:00:00
2015-06-23 00:00:00
2015-06-24 12:00:00
2015-06-26 00:00:00
2015-06-27 12:00:00
2015-06-29 00:00:00
2015-06-30 12:00:00
]
>>> pl.date_range(date(2022, 1, 1), date(2022, 3, 1), "1mo", name="drange")
shape: (3,)
Series: 'drange' [date]
[
2022-01-01
2022-02-01
2022-03-01
]
"""
if isinstance(interval, timedelta):
interval = _timedelta_to_pl_duration(interval)
Expand Down
50 changes: 41 additions & 9 deletions py-polars/polars/internals/lazy_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,7 @@ def collect(
"""
Collect into a DataFrame.
Note: use `fetch` if you want to run this query on the first `n` rows only.
Note: use :func:`fetch` if you want to run your query on the first `n` rows only.
This can be a huge time saver in debugging queries.
Parameters
Expand Down Expand Up @@ -642,8 +642,9 @@ def fetch(
slice_pushdown: bool = True,
) -> DF:
"""
Fetch is like a collect operation, but it overwrites the number of rows read by every scan
operation. This is a utility that helps debug a query on a smaller number of rows.
Fetch is like a :func:`collect` operation, but it overwrites the number of rows read
by every scan operation. This is a utility that helps debug a query on a smaller number
of rows.
Note that the fetch does not guarantee the final number of rows in the DataFrame.
Filter, join operations and a lower number of rows available in the scanned file influence
Expand Down Expand Up @@ -767,12 +768,40 @@ def cache(self: LDF) -> LDF:
def cleared(self: LDF) -> LDF:
"""
Create an empty copy of the current LazyFrame, with identical schema but no data.
See Also
--------
clone : Cheap deepcopy/clone.
Examples
--------
>>> df = pl.DataFrame(
... {
... "a": [None, 2, 3, 4],
... "b": [0.5, None, 2.5, 13],
... "c": [True, True, False, None],
... }
... ).lazy()
>>> df.cleared().fetch()
shape: (0, 3)
┌─────┬─────┬──────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ bool │
╞═════╪═════╪══════╡
└─────┴─────┴──────┘
"""
return self._dataframe_class(columns=self.schema).lazy()

def clone(self: LDF) -> LDF:
"""
Very cheap deepcopy/clone.
See Also
--------
cleared : Create an empty copy of the current LazyFrame, with identical
schema but no data.
"""
return self._from_pyldf(self._ldf.clone())

Expand Down Expand Up @@ -1774,8 +1803,10 @@ def slice(self: LDF, offset: int, length: int | None = None) -> LDF:

def limit(self: LDF, n: int = 5) -> LDF:
"""
Limit the DataFrame to the first `n` rows. Note if you don't want the rows to be scanned,
use the `fetch` operation.
Limit the LazyFrame to the first `n` rows.
Note if you don't want the rows to be scanned, use the :func:`fetch` operation
instead.
Parameters
----------
Expand All @@ -1786,12 +1817,13 @@ def limit(self: LDF, n: int = 5) -> LDF:

def head(self: LDF, n: int = 5) -> LDF:
"""
Gets the first `n` rows of the DataFrame. You probably don't want to use this!
Gets the first `n` rows of the DataFrame.
Consider using the `fetch` operation. The `fetch` operation will truly load the first `n`
rows lazily.
You probably don't want to use this!
Consider using the :func:`fetch` operation instead. The :func:`fetch` operation will truly
load the first `n` rows lazily.
This operation instead loads all the rows and only applies the `head` at the end.
This operation instead loads all the rows and only applies the ``head`` at the end.
Parameters
----------
Expand Down

0 comments on commit 6812c87

Please sign in to comment.