Skip to content

Commit

Permalink
Add useful example for pl.StringCache(). (#3677)
Browse files Browse the repository at this point in the history
  • Loading branch information
ghuls committed Jun 12, 2022
1 parent eaf3958 commit 716f97d
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 9 deletions.
2 changes: 2 additions & 0 deletions py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3583,6 +3583,8 @@ def join(
The direction is backward
The keys must be sorted to perform an asof join
**Joining on columns with categorical data**
See pl.StringCache().
"""
if how == "asof": # pragma: no cover
warnings.warn(
Expand Down
41 changes: 32 additions & 9 deletions py-polars/polars/string_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,43 @@ class StringCache:
Context manager that allows data sources to share the same categorical features.
This will temporarily cache the string categories until the context manager is finished.
>>> df = pl.DataFrame(
... {
... "a_col": ["red", "green", "blue"],
... "b_col": ["yellow", "orange", "black"],
... }
... )
>>> with pl.StringCache():
... df = df.with_columns(
... df1 = pl.DataFrame(
... [
... pl.col("a_col").cast(pl.Categorical).alias("a_col"),
... pl.col("b_col").cast(pl.Categorical).alias("b_col"),
... pl.Series(
... "color", ["red", "green", "blue", "orange"], pl.Categorical
... ),
... pl.Series("uint8", [1, 2, 3, 4], pl.UInt8),
... ]
... )
... df2 = pl.DataFrame(
... [
... pl.Series(
... "color",
... ["yellow", "green", "orange", "black", "red"],
... pl.Categorical,
... ),
... pl.Series("char", ["a", "b", "c", "d", "e"], pl.Utf8),
... ]
... )
...
... # Both dataframes are use the same string cache for the categorical column,
... # so the join operation on that column will succeed.
... df_join = df1.join(df2, how="inner", on="color")
...
>>> df_join
shape: (3, 3)
┌────────┬───────┬──────┐
│ color ┆ uint8 ┆ char │
│ --- ┆ --- ┆ --- │
│ cat ┆ u8 ┆ str │
╞════════╪═══════╪══════╡
│ green ┆ 2 ┆ b │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ orange ┆ 4 ┆ c │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
│ red ┆ 1 ┆ e │
└────────┴───────┴──────┘
"""

def __init__(self) -> None:
Expand Down

0 comments on commit 716f97d

Please sign in to comment.