/
categorical.py
63 lines (51 loc) · 1.88 KB
/
categorical.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from __future__ import annotations
from typing import TYPE_CHECKING
from polars._utils.deprecation import deprecate_function
from polars._utils.wrap import wrap_expr
if TYPE_CHECKING:
from polars import Expr
from polars.type_aliases import CategoricalOrdering
class ExprCatNameSpace:
"""Namespace for categorical related expressions."""
_accessor = "cat"
def __init__(self, expr: Expr):
self._pyexpr = expr._pyexpr
@deprecate_function(
"Set the ordering directly on the datatype `pl.Categorical('lexical')`"
" or `pl.Categorical('physical')` or `cast()` to the intended data type."
" This method will be removed in the next breaking change",
version="0.19.19",
)
def set_ordering(self, ordering: CategoricalOrdering) -> Expr:
"""
Determine how this categorical series should be sorted.
Parameters
----------
ordering : {'physical', 'lexical'}
Ordering type:
- 'physical' -> Use the physical representation of the categories to
determine the order (default).
- 'lexical' -> Use the string values to determine the ordering.
"""
return wrap_expr(self._pyexpr.cat_set_ordering(ordering))
def get_categories(self) -> Expr:
"""
Get the categories stored in this data type.
Examples
--------
>>> df = pl.Series(
... "cats", ["foo", "bar", "foo", "foo", "ham"], dtype=pl.Categorical
... ).to_frame()
>>> df.select(pl.col("cats").cat.get_categories())
shape: (3, 1)
┌──────┐
│ cats │
│ --- │
│ str │
╞══════╡
│ foo │
│ bar │
│ ham │
└──────┘
"""
return wrap_expr(self._pyexpr.cat_get_categories())