# Data Sources

There are some good sources of data for MTG

https://scryfall.com/

https://mtgjson.com/

https://github.com/taw/magic-search-engine/tree/master

## Magice Search Engine

The Magic Search Engine by [taw](https://github.com/taw) has compiled card, deck, and booster data.

Let's clone the repo then copy the data directory.

Note this repo is pretty large at 122 MB.  Only doing a shallow clone, due to much larger size.

In [6]:
import os
import json
import requests
import gzip
import shutil
import pathlib
from collections import defaultdict

In [7]:
!git config --global http.postBuffer 1048576000
os.chdir(os.path.expanduser('~/'))
!git clone  --depth 1 https://github.com/taw/magic-search-engine.git #shallower clone
os.chdir('magic-search-engine')
!cp -r data/* ~/mtg-modeling/data/magic-search-engine
os.chdir(os.path.expanduser("~/mtg-modeling"))

Let's open a set json and explore

In [12]:
filepath = 'data/magic-search-engine/sets/OTJ.json'
with open(filepath, 'r', encoding='utf-8') as f:
    data = json.load(f)

In [14]:
data.keys()

dict_keys(['baseSetSize', 'block', 'cards', 'cardsphereSetId', 'code', 'isFoilOnly', 'isOnlineOnly', 'keyruneCode', 'languages', 'mcmId', 'mcmIdExtras', 'mcmName', 'mtgoCode', 'name', 'releaseDate', 'sealedProduct', 'tcgplayerGroupId', 'tokenSetCode', 'tokens', 'totalSetSize', 'translations', 'type', 'meta'])

In [29]:
data['cards'][0]

{'artist': 'Darrell Riche',
 'artistIds': ['262c8e55-4efc-467b-a042-6f734b9d2e01'],
 'availability': ['arena', 'mtgo', 'paper'],
 'boosterTypes': ['default'],
 'borderColor': 'black',
 'colorIdentity': ['W'],
 'colors': ['W'],
 'convertedManaCost': 3.0,
 'finishes': ['nonfoil', 'foil'],
 'flavorText': 'Weary travelers trade stories of the Eversaloon, an extraplanar respite that appears at the moment it is most needed.',
 'foreignData': [{'flavorText': 'Müde Reisende erzählen sich Geschichten vom Saloon der Ewigkeit, einem außerweltlichen Ort zur Rast, der Verirrten erscheint, wenn ihre Not am größten ist.',
   'identifiers': {'multiverseId': 655206,
    'scryfallId': '3fdab8b0-7a42-4892-bbac-5f8738c35e4f'},
   'language': 'German',
   'multiverseId': 655206,
   'name': 'Noch eine Runde',
   'text': 'Schicke eine beliebige Anzahl an Kreaturen, die du kontrollierst, ins Exil und bringe sie dann unter der Kontrolle ihres Besitzers ins Spiel zurück. Wiederhole dann diesen Vorgang X weitere

In [39]:
card_keys = defaultdict(int)
cards = data['cards']

for card in cards:
    for k,v in card.items():
        card_keys[k] += 1

card_keys

defaultdict(int,
            {'artist': 374,
             'artistIds': 374,
             'availability': 374,
             'boosterTypes': 276,
             'borderColor': 374,
             'colorIdentity': 374,
             'colors': 374,
             'convertedManaCost': 374,
             'finishes': 374,
             'flavorText': 175,
             'foreignData': 374,
             'frameVersion': 374,
             'hasFoil': 374,
             'hasNonFoil': 374,
             'identifiers': 374,
             'language': 374,
             'layout': 374,
             'legalities': 374,
             'manaCost': 334,
             'manaValue': 374,
             'name': 374,
             'number': 374,
             'originalText': 359,
             'originalType': 374,
             'purchaseUrls': 374,
             'rarity': 374,
             'rulings': 290,
             'securityStamp': 168,
             'setCode': 374,
             'sourceProducts': 368,
             'subtypes': 374,
    

In [48]:
def count_keys(term):
    val_counts = defaultdict(int)

    for card in cards:
        if term in card:
            if isinstance(card[term], list):
                for keyword in card[term]:
                    val_counts[keyword] += 1
            else:
                val_counts[card[term]] += 1

    return val_counts

In [51]:
count_keys(term='keywords')

defaultdict(int,
            {'Flying': 40,
             'Ward': 8,
             'Flash': 13,
             'Plot': 51,
             'Saddle': 24,
             'Vigilance': 17,
             'Spree': 28,
             'Scry': 5,
             'Lifelink': 11,
             'Equip': 8,
             'Mill': 8,
             'Surveil': 9,
             'Enchant': 3,
             'Menace': 7,
             'Treasure': 20,
             'Deathtouch': 7,
             'Haste': 15,
             'Reach': 13,
             'Affinity': 1,
             'Trample': 18,
             'First strike': 3,
             'Protection': 1,
             'Defender': 2,
             'Landfall': 2,
             'Prowess': 2,
             'Investigate': 2,
             'Crew': 2})

In [50]:
count_keys(term='isReprint')

defaultdict(int, {True: 34})

In [52]:
count_keys(term='colorIdentity')

defaultdict(int, {'W': 94, 'R': 100, 'U': 97, 'B': 94, 'G': 99})

In [53]:
count_keys(term='rarity')

defaultdict(int, {'rare': 122, 'mythic': 41, 'common': 106, 'uncommon': 105})

In [54]:
count_keys(term='manaValue')

defaultdict(int,
            {3.0: 95,
             4.0: 54,
             1.0: 41,
             6.0: 11,
             2.0: 98,
             5.0: 34,
             7.0: 1,
             0.0: 40})

In [56]:
count_keys(term='types')

defaultdict(int,
            {'Sorcery': 39,
             'Creature': 214,
             'Instant': 39,
             'Enchantment': 25,
             'Artifact': 17,
             'Planeswalker': 5,
             'Land': 40})

In [57]:
count_keys(term='supertypes')

defaultdict(int, {'Legendary': 96, 'Basic': 15})

In [58]:
count_keys(term='subtypes')

defaultdict(int,
            {'Angel': 3,
             'Armadillo': 2,
             'Bird': 9,
             'Rogue': 44,
             'Cat': 4,
             'Beast': 6,
             'Mount': 24,
             'Sheep': 1,
             'Rabbit': 2,
             'Mercenary': 20,
             'Spirit': 2,
             'Human': 72,
             'Scout': 9,
             'Ox': 1,
             'Dwarf': 4,
             'Artificer': 1,
             'Faerie': 3,
             'Squirrel': 1,
             'Noble': 2,
             'Pegasus': 1,
             'Knight': 2,
             'Soldier': 4,
             'Equipment': 8,
             'Citizen': 1,
             'Griffin': 1,
             'Salamander': 2,
             'Crab': 1,
             'Turtle': 1,
             'Homarid': 1,
             'Djinn': 1,
             'Advisor': 5,
             'Homunculus': 2,
             'Warlock': 18,
             'Drake': 1,
             'Shark': 1,
             'Sphinx': 3,
             'Wizard': 5,
          

In [61]:
count_keys(term='boosterTypes')

defaultdict(int, {'default': 276})

Let's take the JSON data and save the relevant fields for predictive modeling

In [None]:
filter_keys = [
    'availability',
    'boosterTypes',
]

card_keys[
    ''
]

In [10]:
url = "https://mtgjson.com/api/v5/AllPrintings.sqlite.gz"
download_path = "../data/AllPrintings.sqlite.gz"
unzip_path = "../data/AllPrintings.sqlite"


In [4]:

response = requests.get(url, stream=True)
response.raise_for_status()  # Ensure we notice bad responses


In [11]:
# check if paths exist

parent_dir = pathlib.Path(download_path).parent
if not parent_dir.exists():
    parent_dir.mkdir(parents=True)

parent_dir = pathlib.Path(unzip_path).parent
if not parent_dir.exists():
    parent_dir.mkdir(parents=True)

In [5]:
with open(download_path, "wb") as f:
    f.write(response.content)

In [15]:
with gzip.open(download_path, "rb") as f_in:
    with open(unzip_path, "wb") as f_out:
        shutil.copyfileobj(f_in, f_out)