In [1]:
import polars as pl

## pl.Array

In [2]:
df1 = pl.DataFrame(
    {
        "id": [1, 2, 3],
        "numbers": [[5, 15, 3], [11, 14, 6], [13, 18, 12]],
    },
    schema_overrides={"id": pl.UInt64, "numbers": pl.Array(pl.UInt64, 3)},
)
print(df1)

shape: (3, 2)
┌─────┬───────────────┐
│ id  ┆ numbers       │
│ --- ┆ ---           │
│ u64 ┆ array[u64, 3] │
╞═════╪═══════════════╡
│ 1   ┆ [5, 15, 3]    │
│ 2   ┆ [11, 14, 6]   │
│ 3   ┆ [13, 18, 12]  │
└─────┴───────────────┘


In [3]:
print(
    df1.with_columns(
        pl.col("numbers").arr.first().alias("first"),
        pl.col("numbers").arr.last().alias("last"),
        pl.col("numbers").arr.get(1).alias("get_1"),
    )
)

shape: (3, 5)
┌─────┬───────────────┬───────┬──────┬───────┐
│ id  ┆ numbers       ┆ first ┆ last ┆ get_1 │
│ --- ┆ ---           ┆ ---   ┆ ---  ┆ ---   │
│ u64 ┆ array[u64, 3] ┆ u64   ┆ u64  ┆ u64   │
╞═════╪═══════════════╪═══════╪══════╪═══════╡
│ 1   ┆ [5, 15, 3]    ┆ 5     ┆ 3    ┆ 15    │
│ 2   ┆ [11, 14, 6]   ┆ 11    ┆ 6    ┆ 14    │
│ 3   ┆ [13, 18, 12]  ┆ 13    ┆ 12   ┆ 18    │
└─────┴───────────────┴───────┴──────┴───────┘


## pl.List

In [4]:
df2 = pl.DataFrame(
    {
        "id": [1, 2, 3],
        "numbers": [
            "5 15 1",
            "None 14 6",
            "13 18 19",
        ],
    },
    schema_overrides={"id": pl.UInt64},
)
print(df2)

shape: (3, 2)
┌─────┬───────────┐
│ id  ┆ numbers   │
│ --- ┆ ---       │
│ u64 ┆ str       │
╞═════╪═══════════╡
│ 1   ┆ 5 15 1    │
│ 2   ┆ None 14 6 │
│ 3   ┆ 13 18 19  │
└─────┴───────────┘


In [5]:
print(
    df2.with_columns(
        pl.col("numbers")
        .str.split(" ")
        .list.eval(pl.element().cast(pl.UInt64, strict=False))
        .alias("list")
    )
)

shape: (3, 3)
┌─────┬───────────┬───────────────┐
│ id  ┆ numbers   ┆ list          │
│ --- ┆ ---       ┆ ---           │
│ u64 ┆ str       ┆ list[u64]     │
╞═════╪═══════════╪═══════════════╡
│ 1   ┆ 5 15 1    ┆ [5, 15, 1]    │
│ 2   ┆ None 14 6 ┆ [null, 14, 6] │
│ 3   ┆ 13 18 19  ┆ [13, 18, 19]  │
└─────┴───────────┴───────────────┘


In [6]:
print(
    df2.with_columns(
        pl.col("numbers")
        .str.split(" ")
        .list.eval(pl.element().cast(pl.UInt64, strict=False))
        .alias("list")
    ).with_columns(
        pl.col("list")
        .list.eval(
            pl.element()
            .gt(18)
            .or_(pl.element().lt(3))
            .or_(pl.element().is_null())
        )
        .list.eval(pl.element().arg_true())
        .alias("outlier_indexes")
    )
)

shape: (3, 4)
┌─────┬───────────┬───────────────┬─────────────────┐
│ id  ┆ numbers   ┆ list          ┆ outlier_indexes │
│ --- ┆ ---       ┆ ---           ┆ ---             │
│ u64 ┆ str       ┆ list[u64]     ┆ list[u32]       │
╞═════╪═══════════╪═══════════════╪═════════════════╡
│ 1   ┆ 5 15 1    ┆ [5, 15, 1]    ┆ [2]             │
│ 2   ┆ None 14 6 ┆ [null, 14, 6] ┆ [0]             │
│ 3   ┆ 13 18 19  ┆ [13, 18, 19]  ┆ [2]             │
└─────┴───────────┴───────────────┴─────────────────┘


In [7]:
print(
    df2.with_columns(
        pl.col("numbers")
        .str.split(" ")
        .list.eval(pl.element().cast(pl.UInt64, strict=False))
        .alias("list")
    )
    .with_columns(
        pl.col("list")
        .list.eval(
            pl.element()
            .gt(18)
            .or_(pl.element().lt(3))
            .or_(pl.element().is_null())
        )
        .list.eval(pl.element().arg_true())
        .alias("outlier_indexes")
    )
    .with_columns(
        pl.col("list")
        .list.gather(pl.col("outlier_indexes"))
        .alias("outliers")
    )
)

shape: (3, 5)
┌─────┬───────────┬───────────────┬─────────────────┬───────────┐
│ id  ┆ numbers   ┆ list          ┆ outlier_indexes ┆ outliers  │
│ --- ┆ ---       ┆ ---           ┆ ---             ┆ ---       │
│ u64 ┆ str       ┆ list[u64]     ┆ list[u32]       ┆ list[u64] │
╞═════╪═══════════╪═══════════════╪═════════════════╪═══════════╡
│ 1   ┆ 5 15 1    ┆ [5, 15, 1]    ┆ [2]             ┆ [1]       │
│ 2   ┆ None 14 6 ┆ [null, 14, 6] ┆ [0]             ┆ [null]    │
│ 3   ┆ 13 18 19  ┆ [13, 18, 19]  ┆ [2]             ┆ [19]      │
└─────┴───────────┴───────────────┴─────────────────┴───────────┘


## pl.Struct

In [8]:
df3 = pl.DataFrame(
    {
        "numbers": [
            {"first": 5, "second": 15, "third": 15},
            {"first": 5, "second": 14, "third": 6},
            {"first": 13, "second": 18, "third": 5},
        ]
    },
    schema={
        "numbers": pl.Struct(
            {"first": pl.UInt64, "second": pl.UInt64, "third": pl.UInt64}
        )
    },
)
print(df3)

shape: (3, 1)
┌───────────┐
│ numbers   │
│ ---       │
│ struct[3] │
╞═══════════╡
│ {5,15,15} │
│ {5,14,6}  │
│ {13,18,5} │
└───────────┘


In [9]:
print(df3.select(pl.col("numbers").struct.unnest()))

shape: (3, 3)
┌───────┬────────┬───────┐
│ first ┆ second ┆ third │
│ ---   ┆ ---    ┆ ---   │
│ u64   ┆ u64    ┆ u64   │
╞═══════╪════════╪═══════╡
│ 5     ┆ 15     ┆ 15    │
│ 5     ┆ 14     ┆ 6     │
│ 13    ┆ 18     ┆ 5     │
└───────┴────────┴───────┘


In [10]:
print(df3.select(pl.col("numbers").struct.field("first")))

shape: (3, 1)
┌───────┐
│ first │
│ ---   │
│ u64   │
╞═══════╡
│ 5     │
│ 5     │
│ 13    │
└───────┘


In [11]:
print(df3.select(pl.col("numbers").struct.field("*")))

shape: (3, 3)
┌───────┬────────┬───────┐
│ first ┆ second ┆ third │
│ ---   ┆ ---    ┆ ---   │
│ u64   ┆ u64    ┆ u64   │
╞═══════╪════════╪═══════╡
│ 5     ┆ 15     ┆ 15    │
│ 5     ┆ 14     ┆ 6     │
│ 13    ┆ 18     ┆ 5     │
└───────┴────────┴───────┘


In [12]:
print(
    df3.select(
        pl.col("numbers")
        .struct.field("first")
        .append(pl.col("numbers").struct.field("second"))
        .append(pl.col("numbers").struct.field("third"))
        .value_counts(sort=True)
        .alias("counts")
    )
)

shape: (6, 1)
┌───────────┐
│ counts    │
│ ---       │
│ struct[2] │
╞═══════════╡
│ {5,3}     │
│ {15,2}    │
│ {13,1}    │
│ {14,1}    │
│ {18,1}    │
│ {6,1}     │
└───────────┘


In [13]:
df4 = pl.DataFrame(
    {"first": [5, 5, 13], "second": [15, 14, 18], "third": [15, 6, 5]},
    schema={"first": pl.UInt64, "second": pl.UInt64, "third": pl.UInt64},
)
print(df4)

shape: (3, 3)
┌───────┬────────┬───────┐
│ first ┆ second ┆ third │
│ ---   ┆ ---    ┆ ---   │
│ u64   ┆ u64    ┆ u64   │
╞═══════╪════════╪═══════╡
│ 5     ┆ 15     ┆ 15    │
│ 5     ┆ 14     ┆ 6     │
│ 13    ┆ 18     ┆ 5     │
└───────┴────────┴───────┘


In [14]:
print(
    df4.with_columns(
        pl.struct("first", "second", "third").alias("combined")
    )
)

shape: (3, 4)
┌───────┬────────┬───────┬───────────┐
│ first ┆ second ┆ third ┆ combined  │
│ ---   ┆ ---    ┆ ---   ┆ ---       │
│ u64   ┆ u64    ┆ u64   ┆ struct[3] │
╞═══════╪════════╪═══════╪═══════════╡
│ 5     ┆ 15     ┆ 15    ┆ {5,15,15} │
│ 5     ┆ 14     ┆ 6     ┆ {5,14,6}  │
│ 13    ┆ 18     ┆ 5     ┆ {13,18,5} │
└───────┴────────┴───────┴───────────┘


In [15]:
print(
    df4.with_columns(
        pl.struct("first", "second", "third")
        .alias("combined")
        .map_batches(
            lambda x: x.struct.field("first")
            + x.struct.field("second")
            + x.struct.field("third"),
            return_dtype=pl.UInt64,
        )
        .alias("sum")
    )
)

shape: (3, 4)
┌───────┬────────┬───────┬─────┐
│ first ┆ second ┆ third ┆ sum │
│ ---   ┆ ---    ┆ ---   ┆ --- │
│ u64   ┆ u64    ┆ u64   ┆ u64 │
╞═══════╪════════╪═══════╪═════╡
│ 5     ┆ 15     ┆ 15    ┆ 35  │
│ 5     ┆ 14     ┆ 6     ┆ 25  │
│ 13    ┆ 18     ┆ 5     ┆ 36  │
└───────┴────────┴───────┴─────┘


In [16]:
print(
    df4.with_columns(
        pl.sum_horizontal("first", "second", "third").alias("sum")
    )
)

shape: (3, 4)
┌───────┬────────┬───────┬─────┐
│ first ┆ second ┆ third ┆ sum │
│ ---   ┆ ---    ┆ ---   ┆ --- │
│ u64   ┆ u64    ┆ u64   ┆ u64 │
╞═══════╪════════╪═══════╪═════╡
│ 5     ┆ 15     ┆ 15    ┆ 35  │
│ 5     ┆ 14     ┆ 6     ┆ 25  │
│ 13    ┆ 18     ┆ 5     ┆ 36  │
└───────┴────────┴───────┴─────┘
