In [1]:
import json

import polars as pl

from spells import summon, ColName, ColType, ColSpec

## Fun with card attributes

This is a sample notebook demonstrating how to define custom card attributes and define functions of card attributes at the row level, which now has first-class support in Spells.

### Base View Columns can Depend on Card Attributes via Card Context
Suppose we want to find out how often drafters take the card with lowest mana value. We will need the mana value of the seen cards in `PACK_CARD` as well as the picked card `PICK`. We use function expression definitions with the `name` argument to achieve this in both cases.

I'm going to make a template function with some extra columns for reasons that will be apparent soon. I suggest you read through these one by one and try to parse the intent of the `expr` field for each one. It might look intimidating if you're not familiar with this kind of expression language, but they mainly read off like a sentence. For example, the `seen_{attr}` column check if the `pack_card_{name}` column for a given name is positive, and if it is, it takes the `attr` value from the card_context. The `col_type` field tells Spells how to interpret the expression. For example, `pick_{attr}_sum` is a `PICK_SUM` column, so Spells knows to apply the expression function to the result of the `pick` field, then sum over all base rows for each group.

In [2]:
def attr_cols(attr):
    return {
        f"seen_{attr}": ColSpec(
            col_type=ColType.NAME_SUM,
            expr=(
                lambda name, card_context: pl.when(pl.col(f"pack_card_{name}") > 0)
                .then(card_context[name][attr])
                .otherwise(None)
            ),
        ),
        f"pick_{attr}_sum": ColSpec(
            col_type=ColType.PICK_SUM,
            expr=lambda name, card_context: card_context[name][attr],
        ),
        f"least_{attr}_taken": ColSpec(
            col_type=ColType.PICK_SUM,
            expr=(
                lambda names: pl.col(f"pick_{attr}_sum")
                <= pl.min_horizontal([pl.col(f"seen_{attr}_{name}") for name in names])
            ),
        ),
        f"least_{attr}_taken_rate": ColSpec(
            col_type=ColType.AGG,
            expr=pl.col(f"least_{attr}_taken") / pl.col("num_taken"),
        ),
        f"greatest_{attr}_taken": ColSpec(
            col_type=ColType.PICK_SUM,
            expr=(
                lambda names: pl.col(f"pick_{attr}_sum")
                >= pl.max_horizontal([pl.col(f"seen_{attr}_{name}") for name in names])
            ),
        ),
        f"greatest_{attr}_taken_rate": ColSpec(
            col_type=ColType.AGG,
            expr=pl.col(f"greatest_{attr}_taken") / pl.col("num_taken"),
        ),
        f"pick_{attr}_mean": ColSpec(
            col_type=ColType.AGG, expr=pl.col(f"pick_{attr}_sum") / pl.col("num_taken")
        ),
    }

An extended version of the above function is importable in Spells using `from spells.extension import context_cols`, and columns to convert metrics to z-scores can be found in `stat_cols`

In [3]:
ext = attr_cols("mana_value")

print(
    summon(
        "DSK",
        columns=[
            "num_taken",
            "least_mana_value_taken_rate",
        ],
        group_by=["player_cohort"],
        extensions=ext,
    )
)

shape: (4, 3)
┌───────────────┬───────────┬─────────────────────────────┐
│ player_cohort ┆ num_taken ┆ least_mana_value_taken_rate │
│ ---           ┆ ---       ┆ ---                         │
│ str           ┆ i32       ┆ f64                         │
╞═══════════════╪═══════════╪═════════════════════════════╡
│ Bottom        ┆ 1079884   ┆ 0.175012                    │
│ Middle        ┆ 2515295   ┆ 0.17853                     │
│ Other         ┆ 2184415   ┆ 0.174103                    │
│ Top           ┆ 1303151   ┆ 0.182119                    │
└───────────────┴───────────┴─────────────────────────────┘


As expected, the best players take cheap cards (slightly) more often!

### Feeding Metrics Back In

Next, we'll demonstrate how to feed calculated card metrics back in for use in custom columns, using the `card_context` parameter, which accepts a name-indexed dataframe. We'll calculate gp_wr, then use it to see the average gp_wr selected by cohort.

In [4]:
gp_wr = summon("FDN", columns=[ColName.GP_WR])

ext = attr_cols("gp_wr")

print(
    summon(
        "FDN",
        columns=["pick_gp_wr_mean"],
        group_by=["player_cohort"],
        extensions=ext,
        card_context=gp_wr,
    )
)

shape: (4, 2)
┌───────────────┬─────────────────┐
│ player_cohort ┆ pick_gp_wr_mean │
│ ---           ┆ ---             │
│ str           ┆ f64             │
╞═══════════════╪═════════════════╡
│ Bottom        ┆ 0.537761        │
│ Middle        ┆ 0.538843        │
│ Other         ┆ 0.537782        │
│ Top           ┆ 0.539812        │
└───────────────┴─────────────────┘


Using both paradigms, we can see how often players in each group took the highest-ranked card by gp_wr for early picks, following Sierkovitz (https://threadreaderapp.com/thread/1864654591545008434.html). Here I do p1p1 but you can just tweak the filter or add groups to see the full distributions.

In [5]:
print(
    summon(
        "FDN",
        columns=["num_drafts", "greatest_gp_wr_taken_rate", "picked_match_wr"],
        group_by=["expansion"],
        extensions=ext,
        filter_spec={
            "$and": [{"lhs": "pick_num", "op": "<=", "rhs": 1}, {"pack_num": 1}]
        },
        card_context=gp_wr,
    )
)

shape: (1, 4)
┌───────────┬────────────┬───────────────────────────┬─────────────────┐
│ expansion ┆ num_drafts ┆ greatest_gp_wr_taken_rate ┆ picked_match_wr │
│ ---       ┆ ---        ┆ ---                       ┆ ---             │
│ str       ┆ i32        ┆ f64                       ┆ f64             │
╞═══════════╪════════════╪═══════════════════════════╪═════════════════╡
│ FDN       ┆ 132430     ┆ 0.336306                  ┆ 0.546273        │
└───────────┴────────────┴───────────────────────────┴─────────────────┘


Curiously, the value is quite a bit higher for DSK. It's interesting to break out by cohort as well.

In [6]:
gp_wr = summon("DSK", columns=[ColName.GP_WR])
print(
    summon(
        "DSK",
        columns=["num_drafts", "greatest_gp_wr_taken_rate", "picked_match_wr"],
        group_by=["player_cohort"],
        extensions=ext,
        filter_spec={
            "$and": [{"lhs": "pick_num", "op": "<=", "rhs": 1}, {"pack_num": 1}]
        },
        card_context=gp_wr,
    )
)

shape: (4, 4)
┌───────────────┬────────────┬───────────────────────────┬─────────────────┐
│ player_cohort ┆ num_drafts ┆ greatest_gp_wr_taken_rate ┆ picked_match_wr │
│ ---           ┆ ---        ┆ ---                       ┆ ---             │
│ str           ┆ i32        ┆ f64                       ┆ f64             │
╞═══════════════╪════════════╪═══════════════════════════╪═════════════════╡
│ Bottom        ┆ 25883      ┆ 0.368582                  ┆ 0.464428        │
│ Middle        ┆ 60200      ┆ 0.414784                  ┆ 0.542797        │
│ Other         ┆ 52384      ┆ 0.369674                  ┆ 0.539835        │
│ Top           ┆ 31197      ┆ 0.436837                  ┆ 0.614636        │
└───────────────┴────────────┴───────────────────────────┴─────────────────┘


Let's do the same thing with DEq, my custom card metric. We'll use the base version for simplicity (i.e. without the usual bias and metagame adjustments).

In [7]:
ext = {
    "deq_base": ColSpec(
        col_type=ColType.AGG,
        expr=(pl.col("gp_wr_excess") + 0.03 * (1 - (pl.col("ata") - 1) / 14).pow(2))
        * pl.col("pct_gp"),
    ),
}

deq = summon(
    "DSK", columns=["deq_base"], filter_spec={"player_cohort": "Top"}, extensions=ext
)

ext.update(attr_cols("deq_base"))

print(
    summon(
        "DSK",
        columns=["greatest_deq_base_taken_rate"],
        group_by=["player_cohort"],
        extensions=ext,
        filter_spec={
            "$and": [{"lhs": "pick_num", "op": "<=", "rhs": 1}, {"pack_num": 1}]
        },
        card_context=deq,
    )
)

shape: (4, 2)
┌───────────────┬──────────────────────────────┐
│ player_cohort ┆ greatest_deq_base_taken_rate │
│ ---           ┆ ---                          │
│ str           ┆ f64                          │
╞═══════════════╪══════════════════════════════╡
│ Bottom        ┆ 0.521964                     │
│ Middle        ┆ 0.571495                     │
│ Other         ┆ 0.528444                     │
│ Top           ┆ 0.593935                     │
└───────────────┴──────────────────────────────┘


This is quite promising for the hypothesis that DEq models card quality, although tmhere is still much work to understand the implication of the use of the top player ATA.

### Custom Card Attributes in Card Context

Lastly, let's create a custom card attribute and use it to define custom columns as well. Let's check for the keyword "Flying". I don't have a built-in for it, but I provide the full json text which can be parsed as shown. Note that unlike base view columns, custom `CARD_ATTR` columns should not use `name` and `card_context`, but polars expressions instead. We'll calculate how often a player takes a card with flying when it is available.

Note that `"has_flying"` is available in card_context without passing it in as such. Since it is a provided `CARD_ATTR`, it is accessible on `card_context` automatically.

In [8]:
from spells.extension import context_cols

ext = {
    "has_flying": ColSpec(
        col_type=ColType.CARD_ATTR,
        expr=pl.col("card_json").map_elements(
            lambda text: "Flying" in json.loads(text).get("keywords", []),
            return_dtype=pl.Boolean,
        ),
        version="1",  # use alongside map_elements to use the local cache
    ),
    "greatest_has_flying_seen": ColSpec(
        col_type=ColType.PICK_SUM,
        expr=lambda names: pl.max_horizontal(
            [pl.col(f"seen_has_flying_{name}") for name in names]
        ),
    ),
    "has_flying_taken_rate": ColSpec(
        col_type=ColType.AGG,
        expr=pl.col("pick_has_flying_sum") / pl.col("greatest_has_flying_seen"),
    ),
}

ext.update(context_cols("has_flying", silent=True))

print(
    summon(
        "DSK",
        columns=["has_flying_taken_rate"],
        group_by=["player_cohort"],
        extensions=ext,
    )
)

shape: (4, 2)
┌───────────────┬───────────────────────┐
│ player_cohort ┆ has_flying_taken_rate │
│ ---           ┆ ---                   │
│ str           ┆ f64                   │
╞═══════════════╪═══════════════════════╡
│ Bottom        ┆ 0.166631              │
│ Middle        ┆ 0.155851              │
│ Other         ┆ 0.163145              │
│ Top           ┆ 0.148893              │
└───────────────┴───────────────────────┘


It seems top players know that birds aren't real. Let's check our work a bit. The rate should be `1.0` for flying creatures.

In [9]:
print(
    summon(
        "DSK",
        columns=["has_flying", "has_flying_taken_rate", "num_taken"],
        extensions=ext,
    )
    .sort("has_flying", descending=True)
    .head(10)
)

shape: (10, 4)
┌──────────────────────┬────────────┬───────────────────────┬───────────┐
│ name                 ┆ has_flying ┆ has_flying_taken_rate ┆ num_taken │
│ ---                  ┆ ---        ┆ ---                   ┆ ---       │
│ str                  ┆ bool       ┆ f64                   ┆ i32       │
╞══════════════════════╪════════════╪═══════════════════════╪═══════════╡
│ Abhorrent Oculus     ┆ true       ┆ 1.0                   ┆ 4700      │
│ Doomsday Excruciator ┆ true       ┆ 1.0                   ┆ 6058      │
│ Erratic Apparition   ┆ true       ┆ 1.0                   ┆ 43464     │
│ Fear of Abduction    ┆ true       ┆ 1.0                   ┆ 18970     │
│ Fear of Falling      ┆ true       ┆ 1.0                   ┆ 20719     │
│ Fear of Infinity     ┆ true       ┆ 1.0                   ┆ 18368     │
│ Fear of Isolation    ┆ true       ┆ 1.0                   ┆ 25767     │
│ Friendly Ghost       ┆ true       ┆ 1.0                   ┆ 45337     │
│ Ghostly Dancers      

### Multiple Sets at Once

We can even analyze multiple sets, and group by expansion to feed context back in. Let's repeat the GP WR analysis over multiple sets:

In [10]:
ext = attr_cols("gp_wr")
sets = ["DSK", "BLB", "FDN", "MH3", "OTJ", "MKM"]

gp_wr = summon(sets, group_by=["expansion", "name"], columns=[ColName.GP_WR])
print(
    summon(
        sets,
        columns=["num_drafts", "greatest_gp_wr_taken_rate", "picked_match_wr"],
        group_by=["player_cohort"],
        extensions=ext,
        filter_spec={
            "$and": [{"lhs": "pick_num", "op": "<=", "rhs": 1}, {"pack_num": 1}]
        },
        card_context=gp_wr,
    )
)

shape: (4, 4)
┌───────────────┬────────────┬───────────────────────────┬─────────────────┐
│ player_cohort ┆ num_drafts ┆ greatest_gp_wr_taken_rate ┆ picked_match_wr │
│ ---           ┆ ---        ┆ ---                       ┆ ---             │
│ str           ┆ i32        ┆ f64                       ┆ f64             │
╞═══════════════╪════════════╪═══════════════════════════╪═════════════════╡
│ Bottom        ┆ 143513     ┆ 0.314543                  ┆ 0.463525        │
│ Middle        ┆ 313756     ┆ 0.351066                  ┆ 0.543233        │
│ Other         ┆ 311412     ┆ 0.314304                  ┆ 0.542417        │
│ Top           ┆ 175771     ┆ 0.376888                  ┆ 0.616455        │
└───────────────┴────────────┴───────────────────────────┴─────────────────┘


and DEq, because I'm curious and haven't seen it before.

In [None]:
ext = {
    "deq_base": ColSpec(
        col_type=ColType.AGG,
        expr=(pl.col("gp_wr_excess") + 0.03 * (1 - (pl.col("ata") - 1) / 14).pow(2))
        * pl.col("pct_gp"),
    ),
}

deq = summon(
    sets,
    columns=["deq_base"],
    group_by=["expansion", "name"],
    filter_spec={"player_cohort": "Top"},
    extensions=ext,
)

ext.update(attr_cols("deq_base"))

print(
    summon(
        sets,
        columns=["greatest_deq_base_taken_rate"],
        group_by=["player_cohort"],
        extensions=ext,
        filter_spec={
            "$and": [{"lhs": "pick_num", "op": "<=", "rhs": 1}, {"pack_num": 1}]
        },
        card_context=deq,
    )
)