In [1]:
import polars as pl
from polars import selectors as cs

df = pl.DataFrame(
    {
        "col1": ["A", "A", "A", "B", "B", "B"],
        "col2": ["C", "D", "C", "C", "D", "C"],
        "col3": [1, 2, 3, 4, 5, 6],
        "col4": [11, 12, 13, 14, 15, 16],
    }
)
print(df)

shape: (6, 4)
┌──────┬──────┬──────┬──────┐
│ col1 ┆ col2 ┆ col3 ┆ col4 │
│ ---  ┆ ---  ┆ ---  ┆ ---  │
│ str  ┆ str  ┆ i64  ┆ i64  │
╞══════╪══════╪══════╪══════╡
│ A    ┆ C    ┆ 1    ┆ 11   │
│ A    ┆ D    ┆ 2    ┆ 12   │
│ A    ┆ C    ┆ 3    ┆ 13   │
│ B    ┆ C    ┆ 4    ┆ 14   │
│ B    ┆ D    ┆ 5    ┆ 15   │
│ B    ┆ C    ┆ 6    ┆ 16   │
└──────┴──────┴──────┴──────┘


## df.pivot()

In [2]:
# df.pivot("col2", index="col1", values="col3")

In [3]:
print(
    df.pivot(
        on="col2", index="col1", values="col3", aggregate_function="max"
    )
)

shape: (2, 3)
┌──────┬─────┬─────┐
│ col1 ┆ C   ┆ D   │
│ ---  ┆ --- ┆ --- │
│ str  ┆ i64 ┆ i64 │
╞══════╪═════╪═════╡
│ A    ┆ 3   ┆ 2   │
│ B    ┆ 6   ┆ 5   │
└──────┴─────┴─────┘


In [4]:
df2 = df.pivot(
    on="col2",
    index="col1",
    values="col3",
    aggregate_function=pl.element().max(),
)
print(df2)

shape: (2, 3)
┌──────┬─────┬─────┐
│ col1 ┆ C   ┆ D   │
│ ---  ┆ --- ┆ --- │
│ str  ┆ i64 ┆ i64 │
╞══════╪═════╪═════╡
│ A    ┆ 3   ┆ 2   │
│ B    ┆ 6   ┆ 5   │
└──────┴─────┴─────┘


In [5]:
print(
    df.pivot(
        on="col4",
        index=cs.by_name("col1", "col2"),
        values="col3",
        aggregate_function="max",
    )
)

shape: (4, 8)
┌──────┬──────┬──────┬──────┬──────┬──────┬──────┬──────┐
│ col1 ┆ col2 ┆ 11   ┆ 12   ┆ 13   ┆ 14   ┆ 15   ┆ 16   │
│ ---  ┆ ---  ┆ ---  ┆ ---  ┆ ---  ┆ ---  ┆ ---  ┆ ---  │
│ str  ┆ str  ┆ i64  ┆ i64  ┆ i64  ┆ i64  ┆ i64  ┆ i64  │
╞══════╪══════╪══════╪══════╪══════╪══════╪══════╪══════╡
│ A    ┆ C    ┆ 1    ┆ null ┆ 3    ┆ null ┆ null ┆ null │
│ A    ┆ D    ┆ null ┆ 2    ┆ null ┆ null ┆ null ┆ null │
│ B    ┆ C    ┆ null ┆ null ┆ null ┆ 4    ┆ null ┆ 6    │
│ B    ┆ D    ┆ null ┆ null ┆ null ┆ null ┆ 5    ┆ null │
└──────┴──────┴──────┴──────┴──────┴──────┴──────┴──────┘


In [6]:
# print(
#     df.pivot(
#         on=cs.by_name("col2", "col4"),
#         index=cs.by_name("col1"),
#         values=cs.by_name("col3"),
#         aggregate_function="max",
#     )
# )

In [7]:
# print(
#     df.pivot(
#         on=cs.by_name("col4"),
#         index=cs.by_name("col1"),
#         values=cs.by_name("col2", "col3"),
#         aggregate_function="max",
#     )
# )

## df.unpivot()

In [8]:
print(
    df2.unpivot(
        on=["C", "D"],
        index="col1",
        variable_name="col2",
        value_name="col3",
    ).sort("col3")
)

shape: (4, 3)
┌──────┬──────┬──────┐
│ col1 ┆ col2 ┆ col3 │
│ ---  ┆ ---  ┆ ---  │
│ str  ┆ str  ┆ i64  │
╞══════╪══════╪══════╡
│ A    ┆ D    ┆ 2    │
│ A    ┆ C    ┆ 3    │
│ B    ┆ D    ┆ 5    │
│ B    ┆ C    ┆ 6    │
└──────┴──────┴──────┘


In [9]:
print(
    df2.unpivot(
        on=cs.numeric(),
        index="col1",
        variable_name="col2",
        value_name="col3",
    ).sort("col3")
)

shape: (4, 3)
┌──────┬──────┬──────┐
│ col1 ┆ col2 ┆ col3 │
│ ---  ┆ ---  ┆ ---  │
│ str  ┆ str  ┆ i64  │
╞══════╪══════╪══════╡
│ A    ┆ D    ┆ 2    │
│ A    ┆ C    ┆ 3    │
│ B    ┆ D    ┆ 5    │
│ B    ┆ C    ┆ 6    │
└──────┴──────┴──────┘


## df.unstack()

In [10]:
print(df.unstack(step=3, columns="col1", how="vertical"))

shape: (3, 2)
┌────────┬────────┐
│ col1_0 ┆ col1_1 │
│ ---    ┆ ---    │
│ str    ┆ str    │
╞════════╪════════╡
│ A      ┆ B      │
│ A      ┆ B      │
│ A      ┆ B      │
└────────┴────────┘


In [11]:
print(df.unstack(step=3, columns="col1", how="horizontal"))

shape: (2, 3)
┌────────┬────────┬────────┐
│ col1_0 ┆ col1_1 ┆ col1_2 │
│ ---    ┆ ---    ┆ ---    │
│ str    ┆ str    ┆ str    │
╞════════╪════════╪════════╡
│ A      ┆ A      ┆ A      │
│ B      ┆ B      ┆ B      │
└────────┴────────┴────────┘


In [12]:
print(df.unstack(step=4, columns="col1", how="vertical"))

shape: (4, 2)
┌────────┬────────┐
│ col1_0 ┆ col1_1 │
│ ---    ┆ ---    │
│ str    ┆ str    │
╞════════╪════════╡
│ A      ┆ B      │
│ A      ┆ B      │
│ A      ┆ null   │
│ B      ┆ null   │
└────────┴────────┘


In [13]:
print(
    df.unstack(
        step=4, columns="col1", how="vertical", fill_values="fill_value"
    )
)

shape: (4, 2)
┌────────┬────────────┐
│ col1_0 ┆ col1_1     │
│ ---    ┆ ---        │
│ str    ┆ str        │
╞════════╪════════════╡
│ A      ┆ B          │
│ A      ┆ B          │
│ A      ┆ fill_value │
│ B      ┆ fill_value │
└────────┴────────────┘


## Remarks

### Remark1

In [14]:
print(
    df.group_by("col1", maintain_order=True).agg(
        pl.col("col3").filter(pl.col("col2").eq("C")).max().alias("C"),
        pl.col("col3").filter(pl.col("col2").eq("D")).max().alias("D"),
    )
)

shape: (2, 3)
┌──────┬─────┬─────┐
│ col1 ┆ C   ┆ D   │
│ ---  ┆ --- ┆ --- │
│ str  ┆ i64 ┆ i64 │
╞══════╪═════╪═════╡
│ A    ┆ 3   ┆ 2   │
│ B    ┆ 6   ┆ 5   │
└──────┴─────┴─────┘


In [15]:
from polars.testing import assert_frame_equal

on, index, values = pl.col("col2"), pl.col("col1"), pl.col("col3")
unique_column_values = df.select(on.unique()).to_series().sort().to_list()
agg_func = lambda col: col.max()

df3 = df.group_by(index, maintain_order=True).agg(
    agg_func(values.filter(on.eq(value)).alias(value))
    for value in unique_column_values
)

assert_frame_equal(df2, df3)

### Remark2

In [16]:
index = "col1"

cols = df2.columns
cols.remove(index)

print(
    df2.select(
        "col1",
        pl.repeat(cols, len(cols)).alias("col2"),
        pl.concat_list(pl.col("C", "D")).alias("col3"),
    )
    .explode("col2", "col3")
    .sort("col3")
)

shape: (4, 3)
┌──────┬──────┬──────┐
│ col1 ┆ col2 ┆ col3 │
│ ---  ┆ ---  ┆ ---  │
│ str  ┆ str  ┆ i64  │
╞══════╪══════╪══════╡
│ A    ┆ D    ┆ 2    │
│ A    ┆ C    ┆ 3    │
│ B    ┆ D    ┆ 5    │
│ B    ┆ C    ┆ 6    │
└──────┴──────┴──────┘


In [17]:
index_cols = ["col1"]

on = df2.columns
for index_col in index_cols:
    on.remove(index_col)

variable_name, value_name = "col2", "col3"

print(
    df2.select(
        "col1",
        pl.repeat(on, len(on)).alias(variable_name),
        pl.concat_list(pl.col(on)).alias(value_name),
    )
    .explode(variable_name, value_name)
    .sort(value_name)
)

shape: (4, 3)
┌──────┬──────┬──────┐
│ col1 ┆ col2 ┆ col3 │
│ ---  ┆ ---  ┆ ---  │
│ str  ┆ str  ┆ i64  │
╞══════╪══════╪══════╡
│ A    ┆ D    ┆ 2    │
│ A    ┆ C    ┆ 3    │
│ B    ┆ D    ┆ 5    │
│ B    ┆ C    ┆ 6    │
└──────┴──────┴──────┘
