In [None]:
import polars as pl

In [None]:
df = pl.DataFrame(
    {
        "language": ["English", "Dutch", "Portuguese", "Finish"],
        "fruit": ["pear", "peer", "pêra", "päärynä"],
    }
)
print(df)

In [None]:
# parsing strings
result = df.select(
    pl.col("fruit"),
    pl.col("fruit").str.starts_with("p").alias("starts_with_p"),
    pl.col("fruit").str.contains("p..r").alias("p..r"),
    pl.col("fruit").str.contains("e+").alias("e+"),
    pl.col("fruit").str.ends_with("r").alias("ends_with_r"),
)
print(result)

In [None]:
# extracting a pattern
df = pl.DataFrame(
    {
        "urls": [
            "http://vote.com/ballon_dor?candidate=messi&ref=polars",
            "http://vote.com/ballon_dor?candidat=jorginho&ref=polars",
            "http://vote.com/ballon_dor?candidate=ronaldo&ref=polars",
        ]
    }
)
result = df.select(
    pl.col("urls").str.extract(r"candidate=(\w+)", group_index=1),
)
print(result)

In [None]:
# replace a pattern
df = pl.DataFrame({"text": ["123abc", "abc456"]})
result = df.with_columns(
    pl.col("text").str.replace(r"\d", "-"),
    pl.col("text").str.replace_all(r"\d", "-").alias("text_replace_all"),
)
print(result)

In [None]:
# case conversion
addresses = pl.DataFrame(
    {
        "addresses": [
            "128 PERF st",
            "Rust blVD, 158",
            "PoLaRs Av, 12",
            "1042 Query sq",
        ]
    }
)

addresses = addresses.select(
    pl.col("addresses").alias("originals"),
    pl.col("addresses").str.to_titlecase(),
    pl.col("addresses").str.to_lowercase().alias("lower"),
    pl.col("addresses").str.to_uppercase().alias("upper"),
)
print(addresses)

In [None]:
# stripping strings
addr = pl.col("addresses")
chars = ", 0123456789"
result = addresses.select(
    addr.str.strip_chars(chars).alias("strip"),
    addr.str.strip_chars_end(chars).alias("end"),
    addr.str.strip_chars_start(chars).alias("start"),
    addr.str.strip_prefix("128 ").alias("prefix"),
    addr.str.strip_suffix(", 158").alias("suffix"),
)
print(result)

In [None]:
# slicing strings
df = pl.DataFrame(
    {
        "fruits": ["pear", "mango", "dragonfruit", "passionfruit"],
        "n": [1, -1, 4, -4],
    }
)

result = df.with_columns(
    pl.col("fruits").str.slice(pl.col("n")).alias("slice"),
    pl.col("fruits").str.head(pl.col("n")).alias("head"),
    pl.col("fruits").str.tail(pl.col("n")).alias("tail"),
)
print(result)