In [1]:
import json

import polars as pl

from spells import summon, card_df
from spells.enums import ColName, ColType
from spells.columns import ColumnSpec

## Fun with card attributes

This is a sample notebook demonstrating how to define custom card attributes and define functions of card attributes at the row level. 
The normal paradigm is that card attributes are joined to the aggregated data set, which allows for tagging card-based aggregations by card attributes as well as linear aggregations by card attribute like averaging win rates.

However, you might want to do something nonlinear, and the usual custom column definition won't let you depend on a `CARD_ATTR` column when defining a `NAME_SUM` or `PICK_SUM` column. So we will expose some of our seams to do something a little more custom.

First, a little example of using the json dump to define an arbitrary extension of the raw data. Here we will be unoriginal and grab the mana value, which is already available under `MANA_VALUE`.

In [2]:
ext = [
    ColumnSpec(
        name="mana_value_2",
        col_type=ColType.CARD_ATTR,
        expr=pl.col(ColName.CARD_JSON).map_elements(
            lambda x: json.loads(x)["manaValue"], return_dtype=pl.Float64
        ),
        dependencies=[ColName.CARD_JSON],
        version="1",  # used for cache signature because we have a non-serializable python function
    ),
]

I exposed a function `card_df` just for this example

In [3]:
df = card_df("DSK", ext)
df.select([ColName.NAME, ColName.MANA_VALUE, "mana_value_2"])

name,mana_value,mana_value_2
str,f64,f64
"""Abandoned Campground""",0.0,0.0
"""Abhorrent Oculus""",3.0,3.0
"""Acrobatic Cheerleader""",2.0,2.0
"""Altanak, the Thrice-Called""",7.0,7.0
"""Anthropede""",4.0,4.0
…,…,…
"""Wildfire Wickerfolk""",2.0,2.0
"""Winter's Intervention""",2.0,2.0
"""Winter, Misanthropic Guide""",4.0,4.0
"""Withering Torment""",3.0,3.0


In [4]:
dicts = df.select([ColName.NAME, ColName.MANA_VALUE]).to_dicts()
mv_map = {d[ColName.NAME]: d[ColName.MANA_VALUE] for d in dicts}

Now, suppose we want to find out how often drafters take the card with lowest mana value. We will need the mana value of the seen cards in `PACK_CARD` as well as the picked card `PICK`. The name-mapped columns are easier because we explicitly define them as a function of name. As always, be careful about closures on global variables. The value will be bound to the expression when it is hydrated with the name values each time `summon` is called.

In [5]:
ext.append(
    ColumnSpec(
        name="seen_mana_value",
        col_type=ColType.NAME_SUM,
        exprMap=lambda name: pl.when(pl.col(f"pack_card_{name}") > 0)
        .then(mv_map[name])
        .otherwise(1000.0),  # we're going to take the min
        dependencies=[ColName.PACK_CARD],
    )
)

The `PICK` value is trickier, since we would like to fully specify the formula. We could use `pl.Expr.map_elements` as above, but for large aggregations we're better off defining a proper Polars expression. Let's use the `NAME_SUM` paradigm as above to do that. (I did say we were going to expose some seams)

In [6]:
ext.extend(
    [
        ColumnSpec(
            name="is_picked",
            col_type=ColType.NAME_SUM,
            exprMap=lambda name: pl.col(ColName.PICK) == name,
            dependencies=[ColName.PICK],
        ),
        ColumnSpec(
            name="picked_mana_value",
            col_type=ColType.NAME_SUM,
            exprMap=lambda name: pl.when(pl.col(f"is_picked_{name}"))
            .then(mv_map[name])
            .otherwise(1000.0),
            dependencies=["is_picked"],
        ),
        ColumnSpec(
            name="picked_least_mana_value",
            col_type=ColType.PICK_SUM,
            expr=pl.min_horizontal(pl.col("^picked_mana_value_.*$"))
            <= pl.min_horizontal(
                pl.col("^seen_mana_value_.*$")
            ),  # a dirty trick, careful with unintended matches
            dependencies=["picked_mana_value", "seen_mana_value"],
        ),
        ColumnSpec(
            name="picked_least_mana_value_rate",
            col_type=ColType.AGG,
            expr=pl.col("picked_least_mana_value") / pl.col("num_taken"),
            dependencies=["picked_least_mana_value", "num_taken"],
        ),
    ]
)

While we're at it, let's grab the average mana value of picked cards. This is a straightforward sum-based aggregation, so we can use the experimental `CARD_SUM` column type, along with the above `PICK_SUM` approach for comparison.

In [7]:
ext.extend(
    [
        ColumnSpec(
            name="picked_mv_card_sum",
            col_type=ColType.CARD_SUM,
            expr=pl.col("mana_value") * pl.col("num_taken"),
            dependencies=["mana_value", "num_taken"],
        ),
        ColumnSpec(
            name="avg_mv_picked_1",
            col_type=ColType.AGG,
            expr=pl.col("picked_mv_card_sum") / pl.col("num_taken"),
            dependencies=["picked_mv_card_sum", "num_taken"],
        ),
        ColumnSpec(
            name="picked_mana_value_sum",
            col_type=ColType.PICK_SUM,
            expr=pl.min_horizontal(pl.col("^picked_mana_value_.*$")),
            dependencies=["picked_mana_value"],
        ),
        ColumnSpec(
            name="avg_mv_picked_2",
            col_type=ColType.AGG,
            expr=pl.col("picked_mana_value_sum") / pl.col("num_taken"),
            dependencies=["picked_mana_value_sum", "num_taken"],
        ),
    ]
)

In [9]:
summon(
    "DSK",
    columns=[
        "num_taken",
        "avg_mv_picked_1",
        "avg_mv_picked_2",
        "picked_least_mana_value_rate",
    ],
    group_by=["player_cohort"],
    extensions=ext,
)

player_cohort,num_taken,avg_mv_picked_1,avg_mv_picked_2,picked_least_mana_value_rate
str,i32,f64,f64,f64
"""Bottom""",1079884,3.01064,3.01064,0.175012
"""Middle""",2515295,2.98892,2.98892,0.17853
"""Other""",2184415,3.007436,3.007436,0.174103
"""Top""",1303151,2.969419,2.969419,0.182119


As expected, the best players take cheap cards (slightly) more often!