In [1]:
import polars as pl

os_data = ["macOS", "Linux", "Windows"]

In [2]:
print(pl.__version__)

1.33.1


## pl.Enum

In [3]:
enum_order = ["Linux", "macOS", "Windows"]
# "Linux" < "macOS" < "Windows"
common_os_enum = pl.Enum(enum_order)

common_os_enum_df = (
    pl.DataFrame(
        {"os": os_data},
        schema={"os": common_os_enum},
    )
    .with_columns(pl.col("os").shuffle(seed=42).alias("os2"))
    .with_columns(pl.col("os2").cast(pl.String).alias("os3"))
)
print(common_os_enum_df)

shape: (3, 3)
┌─────────┬─────────┬─────────┐
│ os      ┆ os2     ┆ os3     │
│ ---     ┆ ---     ┆ ---     │
│ enum    ┆ enum    ┆ str     │
╞═════════╪═════════╪═════════╡
│ macOS   ┆ Windows ┆ Windows │
│ Linux   ┆ Linux   ┆ Linux   │
│ Windows ┆ macOS   ┆ macOS   │
└─────────┴─────────┴─────────┘


### Comparing with string literal

In [4]:
# "Linux" < "macOS" < "Windows"
print(common_os_enum_df.filter(pl.col("os").gt("macOS")))

shape: (1, 3)
┌─────────┬───────┬───────┐
│ os      ┆ os2   ┆ os3   │
│ ---     ┆ ---   ┆ ---   │
│ enum    ┆ enum  ┆ str   │
╞═════════╪═══════╪═══════╡
│ Windows ┆ macOS ┆ macOS │
└─────────┴───────┴───────┘


In [5]:
# InvalidOperationError: conversion from `str` to `enum` failed in
# column 'literal' for 1 out of 1 values: ["os2"]

# common_os_enum_df.with_columns(pl.col("os").gt("os2").alias("os > os2"))

### Comparing with pl.String

In [6]:
# "Linux" < "macOS" < "Windows"
print(
    common_os_enum_df.with_columns(
        pl.col("os").gt(pl.col("os3")).alias("os > os3")
    )
)

shape: (3, 4)
┌─────────┬─────────┬─────────┬──────────┐
│ os      ┆ os2     ┆ os3     ┆ os > os3 │
│ ---     ┆ ---     ┆ ---     ┆ ---      │
│ enum    ┆ enum    ┆ str     ┆ bool     │
╞═════════╪═════════╪═════════╪══════════╡
│ macOS   ┆ Windows ┆ Windows ┆ false    │
│ Linux   ┆ Linux   ┆ Linux   ┆ false    │
│ Windows ┆ macOS   ┆ macOS   ┆ true     │
└─────────┴─────────┴─────────┴──────────┘


### Comparing with pl.Enum

In [7]:
# "Linux" < "macOS" < "Windows"
print(
    common_os_enum_df.with_columns(
        pl.col("os").gt(pl.col("os2")).alias("os > os2")
    )
)

shape: (3, 4)
┌─────────┬─────────┬─────────┬──────────┐
│ os      ┆ os2     ┆ os3     ┆ os > os2 │
│ ---     ┆ ---     ┆ ---     ┆ ---      │
│ enum    ┆ enum    ┆ str     ┆ bool     │
╞═════════╪═════════╪═════════╪══════════╡
│ macOS   ┆ Windows ┆ Windows ┆ false    │
│ Linux   ┆ Linux   ┆ Linux   ┆ false    │
│ Windows ┆ macOS   ┆ macOS   ┆ true     │
└─────────┴─────────┴─────────┴──────────┘


## pl.Categorical

In [8]:
common_os_cat_df = (
    pl.DataFrame({"os": os_data}, schema={"os": pl.Categorical()})
    .with_columns(pl.col("os").shuffle(seed=42).alias("os2"))
    .with_columns(pl.col("os2").cast(pl.String).alias("os3"))
)
print(common_os_cat_df)

shape: (3, 3)
┌─────────┬─────────┬─────────┐
│ os      ┆ os2     ┆ os3     │
│ ---     ┆ ---     ┆ ---     │
│ cat     ┆ cat     ┆ str     │
╞═════════╪═════════╪═════════╡
│ macOS   ┆ Windows ┆ Windows │
│ Linux   ┆ Linux   ┆ Linux   │
│ Windows ┆ macOS   ┆ macOS   │
└─────────┴─────────┴─────────┘


### Comparing with string literal

In [9]:
# ord("L")=76, ord("W")=87, ord("m")=109,
# "Linux" < "Windows" < "macOS"
print(common_os_cat_df.filter(pl.col("os").gt("Windows")))

shape: (1, 3)
┌───────┬─────────┬─────────┐
│ os    ┆ os2     ┆ os3     │
│ ---   ┆ ---     ┆ ---     │
│ cat   ┆ cat     ┆ str     │
╞═══════╪═════════╪═════════╡
│ macOS ┆ Windows ┆ Windows │
└───────┴─────────┴─────────┘


### Comparing with pl.String

In [10]:
# ord("L")=76, ord("W")=87, ord("m")=109,
# "Linux" < "Windows" < "macOS"
print(
    common_os_cat_df.with_columns(
        pl.col("os").gt(pl.col("os3")).alias("os > os3"),
    )
)

shape: (3, 4)
┌─────────┬─────────┬─────────┬──────────┐
│ os      ┆ os2     ┆ os3     ┆ os > os3 │
│ ---     ┆ ---     ┆ ---     ┆ ---      │
│ cat     ┆ cat     ┆ str     ┆ bool     │
╞═════════╪═════════╪═════════╪══════════╡
│ macOS   ┆ Windows ┆ Windows ┆ true     │
│ Linux   ┆ Linux   ┆ Linux   ┆ false    │
│ Windows ┆ macOS   ┆ macOS   ┆ false    │
└─────────┴─────────┴─────────┴──────────┘


### Comparing with pl.Categorical

In [11]:
# ord("L")=76, ord("W")=87, ord("m")=109,
# "Linux" < "Windows" < "macOS"
print(
    common_os_cat_df.with_columns(
        pl.col("os").gt(pl.col("os2")).alias("os > os2"),
    )
)

shape: (3, 4)
┌─────────┬─────────┬─────────┬──────────┐
│ os      ┆ os2     ┆ os3     ┆ os > os2 │
│ ---     ┆ ---     ┆ ---     ┆ ---      │
│ cat     ┆ cat     ┆ str     ┆ bool     │
╞═════════╪═════════╪═════════╪══════════╡
│ macOS   ┆ Windows ┆ Windows ┆ true     │
│ Linux   ┆ Linux   ┆ Linux   ┆ false    │
│ Windows ┆ macOS   ┆ macOS   ┆ false    │
└─────────┴─────────┴─────────┴──────────┘


### pl.Expr.cat

In [12]:
print(common_os_cat_df.select(pl.col("os").cat.get_categories()))

shape: (3, 1)
┌─────────┐
│ os      │
│ ---     │
│ str     │
╞═════════╡
│ macOS   │
│ Linux   │
│ Windows │
└─────────┘


## codepanda

In [13]:
import pandas as pd

In [14]:
os_data_pd = ["Linux", "macOS", "Windows"]
os_cat_non_ordered = pd.CategoricalDtype(categories=os_data_pd)
os_cat_ordered = pd.CategoricalDtype(categories=os_data_pd, ordered=True)

df_pd = pd.DataFrame({"os": os_data_pd}).assign(
    os_cat_non_ordered=lambda df_: df_.os.astype(
        {"os": os_cat_non_ordered}
    ),
    os_cat_ordered=lambda df_: df_.os.astype({"os": os_cat_ordered}),
)
print(df_pd)

        os os_cat_non_ordered os_cat_ordered
0    Linux              Linux          Linux
1    macOS              macOS          macOS
2  Windows            Windows        Windows


In [15]:
# TypeError: Unordered Categoricals can only compare equality or not

# df_pd.query("os_cat_non_ordered > 'macOS'")

In [16]:
print(df_pd.query("os_cat_ordered > 'macOS'"))

        os os_cat_non_ordered os_cat_ordered
2  Windows            Windows        Windows


## Remarks

### Remark1

In [17]:
# ord("A)=65, ord("L")=76, ord("W")=87, ord("m")=109,
# "A" < "Linux" < "Windows" < "macOS"
df0 = pl.DataFrame({"os": os_data}, schema={"os": pl.Categorical()})
print(df0.with_columns(pl.col("os").gt("A").alias("> A")))

shape: (3, 2)
┌─────────┬──────┐
│ os      ┆ > A  │
│ ---     ┆ ---  │
│ cat     ┆ bool │
╞═════════╪══════╡
│ macOS   ┆ true │
│ Linux   ┆ true │
│ Windows ┆ true │
└─────────┴──────┘
