# Summary

Wrangle the all card data.  Joins set data as well.  Then selects Standard Legal cards and other 
interesting data sets.

In [99]:
# Setup Notebook
import os
if os.path.basename(os.getcwd()) != "mtg-modeling":
    get_ipython().run_line_magic("run", '-i "../../scripts/notebook_header.py"')  # type: ignore

In [100]:
import pandas as pd
import polars as pl

In [101]:
SET_CODE = "BLB"

paths = {
    "raw": Path("data/raw/mtgjson/AllPrintingsParquetFiles"),
    "interim": Path("data/interim/mtgjson/AllPrintings"),
    "processed": Path("data/processed/mtgjson/AllPrintings"),
}

paths['raw'].exists()
os.makedirs(paths["interim"], exist_ok=True)
os.makedirs(paths["processed"], exist_ok=True)

In [102]:
cards = pl.scan_parquet(paths["raw"] / "cards.parquet")
legalities = pl.scan_parquet(paths["raw"] / "cardLegalities.parquet")
purchase_urls = pl.scan_parquet(paths["raw"] / "cardPurchaseUrls.parquet")
sets = pl.scan_parquet(paths["raw"] / "sets.parquet")

In [103]:
cards.collect().columns

['artist',
 'artistIds',
 'asciiName',
 'attractionLights',
 'availability',
 'boosterTypes',
 'borderColor',
 'cardParts',
 'colorIdentity',
 'colorIndicator',
 'colors',
 'defense',
 'duelDeck',
 'edhrecRank',
 'edhrecSaltiness',
 'faceConvertedManaCost',
 'faceFlavorName',
 'faceManaValue',
 'faceName',
 'finishes',
 'flavorName',
 'flavorText',
 'frameEffects',
 'frameVersion',
 'hand',
 'hasAlternativeDeckLimit',
 'hasFoil',
 'hasNonFoil',
 'isAlternative',
 'isFullArt',
 'isFunny',
 'isOnlineOnly',
 'isOversized',
 'isPromo',
 'isRebalanced',
 'isReprint',
 'isReserved',
 'isStarter',
 'isStorySpotlight',
 'isTextless',
 'isTimeshifted',
 'keywords',
 'language',
 'layout',
 'leadershipSkills',
 'life',
 'loyalty',
 'manaCost',
 'manaValue',
 'name',
 'number',
 'originalPrintings',
 'originalReleaseDate',
 'originalText',
 'originalType',
 'otherFaceIds',
 'power',
 'printings',
 'promoTypes',
 'rarity',
 'rebalancedPrintings',
 'relatedCards',
 'securityStamp',
 'setCode',
 'si

In [104]:
results = (
    cards
    .join(sets, left_on='setCode', right_on='code', how='left')
    .join(legalities, on='uuid', how='left')
    .join(purchase_urls, on='uuid', how='left')
)

wide_cards = results.collect()
wide_cards.write_parquet(paths["interim"] / "wide_cards.parquet")
print(wide_cards.shape)

(96216, 127)


## Data for Standard Legal Cards

Used for analysis of the Standard format.  

See the [MTG Wiki Standard/Timeline](https://mtg.fandom.com/wiki/Standard/Timeline) page for validate correct card composition.

In [105]:
standard_results = (
    results
    .filter(pl.col('standard') == 'Legal')
    .filter(pl.col('borderColor') == 'black')
    .filter(pl.col('isPromo').is_null())
    # .filter(pl.col('isReprint').is_null())
    .filter(pl.col('promoTypes').is_null())
    .sort(pl.col('name'), descending=False)
    .sort(pl.col('releaseDate'), descending=False)
    .sort(pl.col('power'), descending=False)
)

standard_cards = standard_results.collect()
standard_cards.write_parquet(paths["processed"] / "standard_cards.parquet")
print(standard_cards.shape)

(6344, 127)


## Outlaws of Thunder Junction Standard Legal Set

Per the [wiki](https://mtg.fandom.com/wiki/Standard/Timeline), there should be 271 cards in the OTJ set.

In [106]:
otj_results = (
    results
    .filter(pl.col('standard') == 'Legal')
    .filter(pl.col('borderColor') == 'black')
    .filter(pl.col('isPromo').is_null())
    # .filter(pl.col('isReprint').is_null())
    .filter(pl.col('promoTypes').is_null())
    .filter(pl.col('setCode') == SET_CODE)
    .sort(pl.col('number'))
)

otj_cards = otj_results.collect()
otj_cards.write_parquet(paths["processed"] / f"{SET_CODE}_std_cards.parquet")
otj_cards_ids = (
    otj_cards.select([pl.col("name"), pl.col("number").str.zfill(3)])
    .sort(pl.col("number"))
    .to_pandas()
)

In [107]:
otj_core_results = (
    otj_results
    .select([
        pl.col('name'),
        pl.col('setCode'),
        pl.col('releaseDate'),
        pl.col('number').str.zfill(3),
        pl.col('layout'),
        pl.col('availability'),
        pl.col('power'),
        pl.col('toughness'),
        pl.col('colorIdentity'),
        pl.col('colors'),
        pl.col('types'),
        pl.col('subtypes'),
        pl.col('supertypes'),
        pl.col('manaCost'),
        pl.col('manaValue'),
        pl.col('edhrecRank'),
        pl.col('edhrecSaltiness'),
        pl.col('text'),
        pl.col('flavorText'),
    ])
)

otj_core_cards = otj_core_results.collect()
otj_core_cards.write_parquet(paths["processed"] / f"{SET_CODE}_std_thin.parquet")
otj_core_cards.head()

name,setCode,releaseDate,number,layout,availability,power,toughness,colorIdentity,colors,types,subtypes,supertypes,manaCost,manaValue,edhrecRank,edhrecSaltiness,text,flavorText
str,str,str,str,str,str,str,str,str,str,str,str,str,str,f64,f64,f64,str,str
"""Banishing Light""","""BLB""","""2024-08-02""","""001""","""normal""","""arena, mtgo, paper""",,,"""W""","""W""","""Enchantment""","""""","""""","""{2}{W}""",3.0,2267.0,0.21,"""When Banishing Light enters, e…","""""If you cannot find light in t…"
"""Dewdrop Cure""","""BLB""","""2024-08-02""","""010""","""normal""","""arena, mtgo, paper""",,,"""W""","""W""","""Sorcery""","""""","""""","""{2}{W}""",3.0,15834.0,,"""Gift a card (You may promise a…",
"""Maha, Its Feathers Night""","""BLB""","""2024-08-02""","""100""","""normal""","""arena, mtgo, paper""","""6""","""5""","""B""","""B""","""Creature""","""Elemental, Bird""","""Legendary""","""{3}{B}{B}""",5.0,11109.0,,"""Flying, trample\nWard—Discard …","""Its wingspan reaches from dusk…"
"""Moonstone Harbinger""","""BLB""","""2024-08-02""","""101""","""normal""","""arena, mtgo, paper""","""1""","""3""","""B""","""B""","""Creature""","""Bat, Warrior""","""""","""{2}{B}""",3.0,16560.0,,"""Flying, deathtouch\nWhenever y…","""Moonstone weapons drink in the…"
"""Nocturnal Hunger""","""BLB""","""2024-08-02""","""102""","""normal""","""arena, mtgo, paper""",,,"""B""","""B""","""Instant""","""""","""""","""{2}{B}""",3.0,19199.0,,"""Gift a Food (You may promise a…",
