In [1]:
import json

import polars as pl

from spells import summon
from spells.enums import ColName, ColType
from spells.columns import ColSpec

## Fun with card attributes

This is a sample notebook demonstrating how to define custom card attributes and define functions of card attributes at the row level, which now has first-class support in Spells.

Suppose we want to find out how often drafters take the card with lowest mana value. We will need the mana value of the seen cards in `PACK_CARD` as well as the picked card `PICK`. We use function expression definitions with the `name` argument to achieve this in both cases

In [4]:
ext =[
    ColSpec(
        name="seen_mana_value",
        col_type=ColType.NAME_SUM,
        expr=lambda name, card_context: pl.when(pl.col(f"pack_card_{name}") > 0)
        .then(card_context[name]['mana_value'])
        .otherwise(1000.0),  # we're going to take the min
    ),
    ColSpec(
        name="picked_mana_value",
        col_type=ColType.PICK_SUM,
        expr=lambda name, card_context: card_context[name]['mana_value']
    ),
    ColSpec(
        name="picked_least_mana_value",
        col_type=ColType.PICK_SUM,
        expr=lambda names: pl.col('picked_mana_value')
        <= pl.min_horizontal(
            [pl.col(f"seen_mana_value_{name}") for name in names]
        ),
    ),
    ColSpec(
        name="picked_least_mana_value_rate",
        col_type=ColType.AGG,
        expr=pl.col("picked_least_mana_value") / pl.col("num_taken"),
    ),
    ColSpec(
        name="avg_mv_picked",
        col_type=ColType.AGG,
        expr=pl.col("picked_mana_value") / pl.col("num_taken"),
    ),
]


In [5]:
summon(
    "DSK",
    columns=[
        "num_taken",
        "avg_mv_picked",
        "picked_least_mana_value_rate",
    ],
    group_by=["player_cohort"],
    extensions=ext,
)

player_cohort,num_taken,avg_mv_picked,picked_least_mana_value_rate
str,i32,f64,f64
"""Bottom""",1079884,3.01064,0.175012
"""Middle""",2515295,2.98892,0.17853
"""Other""",2184415,3.007436,0.174103
"""Top""",1303151,2.969419,0.182119


As expected, the best players take cheap cards (slightly) more often!

Next, we'll demonstrate how to feed calculated card metrics back in for use in custom columns, using the `card_context` parameter, which accepts a name-indexed dataframe. We'll calculate gp_wr, then use it to see the average gp_wr selected by cohort.

In [22]:
gp_wr = summon("FDN", columns=[ColName.GP_WR])

ext = [
    ColSpec(
        name = "picked_gpwr",
        col_type = ColType.PICK_SUM,
        expr = lambda name, card_context: card_context[name]['gp_wr']
    ),
    ColSpec(
        name = "picked_gpwr_mean",
        col_type = ColType.AGG,
        expr = pl.col("picked_gpwr") / pl.col("num_taken")
    )
]

summon("FDN", columns=["picked_gpwr_mean"], group_by=["player_cohort"], extensions = ext, card_context=gp_wr)

player_cohort,picked_gpwr_mean
str,f64
"""Bottom""",0.536653
"""Middle""",0.537659
"""Other""",0.536676
"""Top""",0.538845


Using both paradigms, we can see how often players in each group took the highest-ranked card by gp_wr for early picks. Here I do p1p1 but you can just tweak the filter or add groups to see the full distributions.

In [23]:
ext.extend(
    [
        ColSpec(
            name="seen_gpwr",
            expr=lambda name, card_context: pl.when(pl.col(f"pack_card_{name}") > 0).then(card_context[name]['gp_wr']).otherwise(0.0),
            col_type=ColType.NAME_SUM
        ),
        ColSpec(
            name="picked_highest_gpwr",
            expr=lambda names: pl.col('picked_gpwr')
                >= pl.max_horizontal(
                [pl.col(f"seen_gpwr_{name}") for name in names]),
            col_type = ColType.PICK_SUM,
        ),
        ColSpec(
            name="picked_highest_gpwr_rate",
            expr=pl.col("picked_highest_gpwr") / pl.col("num_taken"),
            col_type = ColType.AGG,
        ),
    ]
)

summon(
    "FDN",
    columns=[
        "num_drafts",
        "picked_highest_gpwr_rate",
        "picked_match_wr"
    ],
    group_by=["expansion"],
    extensions=ext,
    filter_spec={
        '$and': [
            {'lhs': 'pick_num', 'op': '<=', 'rhs': 1},
            {'pack_num': 1}
        ]   
    },
    card_context=gp_wr
)

expansion,num_drafts,picked_highest_gpwr_rate,picked_match_wr
str,i32,f64,f64
"""FDN""",104426,0.296736,0.544881


Curiously, the value is quite a bit higher for DSK. Interesting to break out by cohort as well.

In [25]:
gp_wr = summon("DSK", columns=[ColName.GP_WR])
summon(
    "DSK",
    columns=[
        "num_drafts",
        "picked_highest_gpwr_rate",
        "picked_match_wr"
    ],
    group_by=["player_cohort"],
    extensions=ext,
    filter_spec={
        '$and': [
            {'lhs': 'pick_num', 'op': '<=', 'rhs': 1},
            {'pack_num': 1}
        ]   
    },
    card_context=gp_wr
)

player_cohort,num_drafts,picked_highest_gpwr_rate,picked_match_wr
str,i32,f64,f64
"""Bottom""",25883,0.368582,0.464428
"""Middle""",60200,0.414784,0.542797
"""Other""",52384,0.369674,0.539835
"""Top""",31197,0.436837,0.614636


Finally, let's do the same thing with DEq, my custom card metric. We'll use the base version for simplicity (i.e. without the usual bias and metagame adjustments).

In [33]:
ext = [
    ColSpec(
        name="deq_base",
        col_type=ColType.AGG,
        expr=(pl.col('gp_wr_excess') + 0.03 * (1 - pl.col('ata')/14).pow(2)) * pl.col('pct_gp'),
    ),
]
deq = summon('DSK', columns=['deq_base'], filter_spec={'player_cohort': 'Top'}, extensions=ext)
ext.extend([
    ColSpec(
        name='picked_deq_base',
        col_type=ColType.PICK_SUM,
        expr=lambda name, card_context: card_context[name]['deq_base']
    ),
    ColSpec(
        name="seen_deq_base",
        expr=lambda name, card_context: pl.when(pl.col(f"pack_card_{name}") > 0).then(card_context[name]['deq_base']).otherwise(0.0),
        col_type=ColType.NAME_SUM
    ),
    ColSpec(
        name="picked_highest_deq_base",
        expr=lambda names: pl.col('picked_deq_base')
            >= pl.max_horizontal(
            [pl.col(f"seen_deq_base_{name}") for name in names]),
        col_type = ColType.PICK_SUM,
    ),
    ColSpec(
        name="picked_highest_deq_base_rate",
        expr=pl.col("picked_highest_deq_base") / pl.col("num_taken"),
        col_type = ColType.AGG,
    ),
])
summon('DSK', columns=['picked_highest_deq_base_rate'], 
    group_by=['player_cohort'], 
    extensions=ext, 
    filter_spec={
        '$and': [
            {'lhs': 'pick_num', 'op': '<=', 'rhs': 1},
            {'pack_num': 1}
        ]   
    },
    card_context=deq
)


player_cohort,picked_highest_deq_base_rate
str,f64
"""Bottom""",0.516439
"""Middle""",0.565515
"""Other""",0.522125
"""Top""",0.587428


This is quite promising for the hypothesis that DEq models card quality, although there is still much work to tease out the implication of the use of the top player ATA.