# Summary

The code to wrangle the data.  Deal with NAs, join data frames, etc.

In [2]:
# Setup Notebook
import os
if os.path.basename(os.getcwd()) != 'mtg-modeling':
    %run -i "../../scripts/notebook_header.py"

Changed working directory to: d:\mtg-modeling


In [3]:
import pandas as pd
import polars as pl

In [25]:
raw_path = Path('data/raw/mtgjson/AllPrintingsParquetFiles')
interim_path = Path('data/interim/mtgjson/AllPrintingsParquetFiles')
processed_path = Path('data/processed/mtgjson/AllPrintingsParquetFiles')

raw_path.exists()
os.makedirs(interim_path, exist_ok=True)
os.makedirs(processed_path, exist_ok=True)

In [34]:
cards = pl.scan_parquet(raw_path / 'cards.parquet')
legalities = pl.scan_parquet(raw_path / 'cardLegalities.parquet')
purchase_urls = pl.scan_parquet(raw_path / 'cardPurchaseUrls.parquet')
sets = pl.scan_parquet(raw_path / 'sets.parquet')

(764, 22)

In [65]:
cards.collect().columns

['artist',
 'artistIds',
 'asciiName',
 'attractionLights',
 'availability',
 'boosterTypes',
 'borderColor',
 'cardParts',
 'colorIdentity',
 'colorIndicator',
 'colors',
 'defense',
 'duelDeck',
 'edhrecRank',
 'edhrecSaltiness',
 'faceConvertedManaCost',
 'faceFlavorName',
 'faceManaValue',
 'faceName',
 'finishes',
 'flavorName',
 'flavorText',
 'frameEffects',
 'frameVersion',
 'hand',
 'hasAlternativeDeckLimit',
 'hasFoil',
 'hasNonFoil',
 'isAlternative',
 'isFullArt',
 'isFunny',
 'isOnlineOnly',
 'isOversized',
 'isPromo',
 'isRebalanced',
 'isReprint',
 'isReserved',
 'isStarter',
 'isStorySpotlight',
 'isTextless',
 'isTimeshifted',
 'keywords',
 'language',
 'layout',
 'leadershipSkills',
 'life',
 'loyalty',
 'manaCost',
 'manaValue',
 'name',
 'number',
 'originalPrintings',
 'originalReleaseDate',
 'originalText',
 'originalType',
 'otherFaceIds',
 'power',
 'printings',
 'promoTypes',
 'rarity',
 'rebalancedPrintings',
 'relatedCards',
 'securityStamp',
 'setCode',
 'si

In [53]:
results = (
    cards
    .join(sets, left_on='setCode', right_on='code', how='left')
    .join(legalities, on='uuid', how='left')
    .join(purchase_urls, on='uuid', how='left')
)

wide_cards = results.collect()
wide_cards.write_parquet(interim_path / 'wide_cards.parquet')
print(wide_cards.shape)

(96216, 127)


## Data for Standard Legal Cards

Used for analysis of the Standard format.  

See the [MTG Wiki Standard/Timeline](https://mtg.fandom.com/wiki/Standard/Timeline) page for validate correct card composition.

In [None]:
results.filter(pl.col)

In [63]:
standard_results = (
    results
    .filter(pl.col('standard') == 'Legal')
    .filter(pl.col('borderColor') == 'black')
    .filter(pl.col('isPromo').is_null())
    # .filter(pl.col('isReprint').is_null())
    .filter(pl.col('promoTypes').is_null())
    .sort(pl.col('name'), descending=False)
    .sort(pl.col('releaseDate'), descending=False)
    .sort(pl.col('power'), descending=False)
)

standard_cards = standard_results.collect()
standard_cards.write_parquet(processed_path / 'standard_cards.parquet')
print(standard_cards.shape)

(6344, 127)


## Outlaws of Thunder Junction Standard Legal Set

Per the [wiki](https://mtg.fandom.com/wiki/Standard/Timeline), there should be 271 cards in the OTJ set.

In [78]:
otj_results = (
    results
    .filter(pl.col('standard') == 'Legal')
    .filter(pl.col('borderColor') == 'black')
    .filter(pl.col('isPromo').is_null())
    # .filter(pl.col('isReprint').is_null())
    .filter(pl.col('promoTypes').is_null())
    .filter(pl.col('setCode') == 'OTJ')
    .sort(pl.col('number'))
)

otj_cards = otj_results.collect()
otj_cards.write_parquet(processed_path / 'OTJSetCards.parquet')
otj_cards_ids = otj_cards.select([pl.col('name'), pl.col('number').str.zfill(3)]).sort(pl.col('number')).to_pandas()

In [79]:
otj_cards_ids.to_csv(processed_path / 'OTJSetCards.csv')

In [73]:
otj_core_results = (
    otj_results
    .select([
        pl.col('name'),
        pl.col('setCode'),
        pl.col('releaseDate'),
        pl.col('number').str.zfill(3),
        pl.col('layout'),
        pl.col('availability'),
        pl.col('power'),
        pl.col('toughness'),
        pl.col('colorIdentity'),
        pl.col('colors'),
        pl.col('types'),
        pl.col('subtypes'),
        pl.col('supertypes'),
        pl.col('manaCost'),
        pl.col('manaValue'),
        pl.col('edhrecRank'),
        pl.col('edhrecSaltiness'),
        pl.col('text'),
        pl.col('flavorText'),
    ])
)

otj_core_cards = otj_core_results.collect()
otj_core_cards.write_parquet(processed_path / 'OTJ_std_core.parquet')
otj_core_cards.head()

name,setCode,releaseDate,number,layout,availability,power,toughness,colorIdentity,colors,types,subtypes,supertypes,manaCost,manaValue,edhrecRank,edhrecSaltiness,text,flavorText
str,str,str,str,str,str,str,str,str,str,str,str,str,str,f64,f64,f64,str,str
"""Another Round""","""OTJ""","""2024-04-19""","""001""","""normal""","""arena, mtgo, paper""",,,"""W""","""W""","""Sorcery""","""""","""""","""{X}{X}{2}{W}""",3.0,8795.0,,"""Exile any number of creatures …","""Weary travelers trade stories …"
"""Eriette's Lullaby""","""OTJ""","""2024-04-19""","""010""","""normal""","""arena, mtgo, paper""",,,"""W""","""W""","""Sorcery""","""""","""""","""{1}{W}""",2.0,15295.0,,"""Destroy target tapped creature…","""When Kellan questioned her, Er…"
"""Rattleback Apothecary""","""OTJ""","""2024-04-19""","""100""","""normal""","""arena, mtgo, paper""","""3""","""2""","""B""","""B""","""Creature""","""Gorgon, Warlock""","""""","""{2}{B}""",3.0,13790.0,,"""Deathtouch\nWhenever you commi…","""""Looking for a little liquid c…"
"""Raven of Fell Omens""","""OTJ""","""2024-04-19""","""101""","""normal""","""arena, mtgo, paper""","""1""","""2""","""B""","""B""","""Creature""","""Bird""","""""","""{1}{B}""",2.0,13317.0,,"""Flying\nWhenever you commit a …","""A raven sighted at midnight of…"
"""Rictus Robber""","""OTJ""","""2024-04-19""","""102""","""normal""","""arena, mtgo, paper""","""4""","""3""","""B""","""B""","""Creature""","""Zombie, Rogue""","""""","""{3}{B}""",4.0,19686.0,,"""When Rictus Robber enters, if …",
