# Scryfall Analysis - Cube Structure

__Objective:__ Connect to publicly available data on Scryfall through their REST API to identify patterns set design.

In [1]:
import datetime
print("Script last ran on", datetime.datetime.today().strftime("%m/%d/%Y"))

Script last ran on 06/14/2021


In [11]:
# libraries
import json
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import re
import requests
from selenium import webdriver

In [3]:
# open browser and go to scryfall
driver = webdriver.Chrome()
driver.get("https://scryfall.com/docs/api/bulk-data")

# retrieve the most recent json file containing 'Default Cards'
default_cards_href = driver.find_element_by_css_selector('#main > div > div.reference-doc-content > div > table:nth-child(7) > tbody > tr:nth-child(5) > td:nth-child(2) > a').get_attribute('href')

# retrieve the most recent json file containing 'All Cards'
all_cards_href = driver.find_element_by_css_selector('#main > div > div.reference-doc-content > div > table:nth-child(7) > tbody > tr:nth-child(7) > td:nth-child(2) > a').get_attribute('href')

print("HREF for 'Default Cards' Bulk Dataset:\n\t", default_cards_href, "\n")
print("HREF for 'All Cards' Bulk Dataset:\n\t", all_cards_href)

HREF for 'Default Cards' Bulk Dataset:
	 https://c2.scryfall.com/file/scryfall-bulk/default-cards/default-cards-20210614210334.json 

HREF for 'All Cards' Bulk Dataset:
	 https://c2.scryfall.com/file/scryfall-bulk/all-cards/all-cards-20210614211729.json


In [4]:
driver.quit()
# if you want to read the most recent scryfall data uncomment the following block
# try:
#     df = pd.read_json(default_cards_href)
# except:
#     df = pd.read_json(all_cards_href)

In [21]:
json_path = 'default-cards-20210614210334.json'

df = pd.read_json(json_path)
df.head()
# print("Shape of df (rows, columns): ", df.shape)
# print("Columns Names\n", list(df.columns), "\n")
# df.head()

MemoryError: 

In [None]:
# tidy dataframe
cols = ['name', 'mana_cost', 'cmc', 'type_line', 'oracle_text', 'power', 'toughness', 'colors', 'color_identity', 'keywords', 'set', 'set_name', 'rarity', 'edhrec_rank', 'produced_mana', 'printed_name', 'printed_text', 'legalities', 'lang']
pioneer_sets = ['Strixhaven: School of Mages', 'Kaldheim', 'Zendikar Rising', 'Core 2021', 'Ikoria Lair of Behemoths', 'Theros Beyond Death', 'Throne of Eldraine', 'Core Set 2020', 
                'War of the Spark', 'Ravnica Allegiance', 'Guilds of Ravnica', 'Core Set 2019', 'Dominaria', 'Rivals of Ixalan', 'Ixalan', 'Hour of Devastation', 
                'Amonkhet', 'Aether Revolt', 'Kaladesh', 'Eldritch Moon', 'Shadows over Innistrad', 'Oath of the Gatewatch', 'Battle for Zendikar', 'Magic Origins', 
                'Dragons of Tarkir', 'Fate Reforged', 'Khans of Tarkir', 'Magic 2015', 'Journey into Nyx', 'Born of the Gods', 'Theros', 'Magic 2014', 
                "Dragon's Maze", 'Gatecrash', 'Return to Ravnica']

pioneer = df[cols]

#pioneer = pioneer[pioneer['legalities'].astype(str).str.contains("'pioneer': 'legal'")] # take only cards from pioneer onwards
pioneer = pioneer[pioneer['set_name'].isin(pioneer_sets)]
pioneer = pioneer[(pioneer['rarity'] == 'common') & (~pioneer['type_line'].str.contains('Land')) &
                 (pioneer['lang'] == 'en')] # filter for commons that aren't lands

# evasion
evasion_keywords = ['Flying', 'Trample', 'Menace',
                    'Plainswalk', 'Islandwalk', 'Forestwalk', 'Mountainwalk', 'Swampwalk',
                    'Skulk', 'Shadow', 'Fear', 'Intimidate']
pioneer['evasion'] = pioneer['keywords'].apply(lambda x: bool(any(item in x for item in evasion_keywords)))

# removal
## need to figure out a way to apply this re to create a boolean column
removal_re = re.compile(r"""
                        (
                          enchanted (creature|permanent) can\'t attack (?!until end of turn)|
                          exile target (?!card)|
                          enchanted creature doesn\'t untap|
                          return target (creature|nonland permanent) to its owner\'s hand|
                          put target (creature|permanent)|
                          destroy target|
                          target (player|opponent) sacrifice|
                          (\+|-)[0-9]*/-[1-9]* until end of turn|
                          deals [0-9]* damage|
                          deals damage equal|
                          target creature .* control(s)? fight
                        )
                        """,
                        re.VERBOSE | re.IGNORECASE)

pioneer['removal'] = pioneer.oracle_text.astype(str).apply(lambda x: bool(test_re.search(x.lower())))
pioneer[(pioneer['removal'] == True) & ~(pioneer['type_line'].str.contains('Creature'))][:100].oracle_text.tolist()


## Summary Statistics by Set

In [None]:
card_type_breakdown = pioneer.groupby('set_name').size().reset_index(name = 'commons') # number of commons

card_types = ['Creature', 'Enchantment', 'Instant', 'Sorcery', 'Artifact']
for card_type in card_types:
    new_df = pioneer[pioneer['type_line'].str.contains(card_type)].groupby('set_name').size().reset_index(name = card_type + "s")
    card_type_breakdown = card_type_breakdown.merge(new_df, how = 'left', on = 'set_name')

print('\nCard Type Breakdown\n')
card_type_breakdown = card_type_breakdown.replace(np.nan, 0)
card_type_breakdown

In [None]:
# for each card type, create a boxplot chart where x is set and y is card ratio

## White

In [None]:
white = pioneer[pioneer['color_identity'].astype(str).str.contains("W")]
card_type_breakdown_w = white.groupby('set_name').size().reset_index(name = 'commons') # number of commons

card_types = ['Creature', 'Enchantment', 'Instant', 'Sorcery', 'Artifact']
for card_type in card_types:
    new_df = white[white['type_line'].str.contains(card_type)].groupby('set_name').size().reset_index(name = card_type + "s")
    card_type_breakdown_w = card_type_breakdown_w.merge(new_df, how = 'left', on = 'set_name')

print('\nCard Type Breakdown For White\n')
card_type_breakdown_w = card_type_breakdown_w.replace(np.nan, 0).set_index('set_name')
ctypes_ratio_w = card_type_breakdown_w.iloc[:, 1:].div(card_type_breakdown_w.commons, axis = 0)
ratio_sums = [row.Creatures + row.Enchantments + row.Instants + row.Sorcerys + row.Artifacts for index, row in ctypes_ratio_w.iterrows()]
ctypes_ratio_w['ratio_sums'] = ratio_sums
ctypes_ratio_w.head()

In [None]:
# cart_type_w_long = pd.melt(card_type_breakdown_w, value_vars = ['Creatures', 'Enchantments', 'Instants', 'Sorcerys', 'Artifacts'],  var_name = 'card_type')

### Creatures

#### Average Curve

In [None]:
# white creatures df
white_creatures = white[white['type_line'].str.contains('Creature')]
creature_curve_df_w = white_creatures.groupby(['set_name', 'cmc']).size().reset_index(name = 'cards')
creature_curve_df_w = creature_curve_df_w.groupby('cmc').cards.agg(['mean', 'min', 'max', 'std']).reset_index()
creature_curve_df_w['count_ratio'] = creature_curve_df_w['mean'] / creature_curve_df_w['mean'].sum()

# white creature curve
plt.figure(figsize = (8,5))
plt.bar(x = creature_curve_df_w['cmc'], height = creature_curve_df_w['mean'], yerr = creature_curve_df_w['std'],
        color = 'ivory', edgecolor = 'black')
plt.ylabel('Average Card Count')
plt.xlabel('Mana Value')
plt.title('White: Average Creature Curve');

In [None]:
creature_ratio_w = ctypes_ratio_w['Creatures'].mean()
print("Average percent of white cards which are creatures:", creature_ratio_w, "\n")

for index, row in creature_curve_df_w.iterrows():
  print("\tRatio of", row['cmc'], "drops:", row['count_ratio'])

#### Creatures with Evasion

In [None]:
print("Ratio of white creatures with evasion:\t",
      len(white_creatures[white_creatures['evasion'] == True]) / len(white_creatures))

### Non-Creatures

## Blue

In [None]:
blue = pioneer[pioneer['color_identity'].astype(str).str.contains("U")]
card_type_breakdown_u = blue.groupby('set_name').size().reset_index(name = 'commons') # number of commons

card_types = ['Creature', 'Enchantment', 'Instant', 'Sorcery', 'Artifact']
for card_type in card_types:
    new_df = blue[blue['type_line'].str.contains(card_type)].groupby('set_name').size().reset_index(name = card_type + "s")
    card_type_breakdown_u = card_type_breakdown_u.merge(new_df, how = 'left', on = 'set_name')

print('\nCard Type Breakdown For Blue\n')
card_type_breakdown_u = card_type_breakdown_u.replace(np.nan, 0).set_index('set_name')
ctypes_ratio_u = card_type_breakdown_u.iloc[:, 1:].div(card_type_breakdown_u.commons, axis = 0)
ratio_sums = [row.Creatures + row.Enchantments + row.Instants + row.Sorcerys + row.Artifacts for index, row in ctypes_ratio_u.iterrows()]
ctypes_ratio_u['ratio_sums'] = ratio_sums
ctypes_ratio_u.head()

### Creatures

In [None]:
# blue creatures df
blue_creatures = blue[blue['type_line'].str.contains('Creature')]
creature_curve_df_u = blue_creatures.groupby(['set_name', 'cmc']).size().reset_index(name = 'cards')
creature_curve_df_u = creature_curve_df_u.groupby('cmc').cards.agg(['mean', 'min', 'max', 'std']).reset_index()
creature_curve_df_u['count_ratio'] = creature_curve_df_u['mean'] / creature_curve_df_u['mean'].sum()

# blue creature curve
plt.figure(figsize = (8,5))
plt.bar(x = creature_curve_df_u['cmc'], height = creature_curve_df_u['mean'], yerr = creature_curve_df_u['std'],
        color = 'dodgerblue', edgecolor = 'black')
plt.ylabel('Average Card Count')
plt.xlabel('Mana Value')
plt.title('Blue: Average Creature Curve');

In [None]:
creature_ratio_u = ctypes_ratio_u['Creatures'].mean()
print("Average percent of blue cards which are creatures:", creature_ratio_u, "\n")

for index, row in creature_curve_df_u.iterrows():
  print("\tRatio of", row['cmc'], "drops:", row['count_ratio'])

## Black

In [None]:
black = pioneer[pioneer['color_identity'].astype(str).str.contains("B")]
card_type_breakdown_b = black.groupby('set_name').size().reset_index(name = 'commons') # number of commons

card_types = ['Creature', 'Enchantment', 'Instant', 'Sorcery', 'Artifact']
for card_type in card_types:
    new_df = black[black['type_line'].str.contains(card_type)].groupby('set_name').size().reset_index(name = card_type + "s")
    card_type_breakdown_b = card_type_breakdown_b.merge(new_df, how = 'left', on = 'set_name')

print('\nCard Type Breakdown For Black\n')
card_type_breakdown_b = card_type_breakdown_b.replace(np.nan, 0).set_index('set_name')
ctypes_ratio_b = card_type_breakdown_b.iloc[:, 1:].div(card_type_breakdown_b.commons, axis = 0)
ratio_sums = [row.Creatures + row.Enchantments + row.Instants + row.Sorcerys + row.Artifacts for index, row in ctypes_ratio_u.iterrows()]
ctypes_ratio_b['ratio_sums'] = ratio_sums
ctypes_ratio_b.head()

### Creatures

In [None]:
# black creatures df
black_creatures = black[black['type_line'].str.contains('Creature')]
creature_curve_df_b = black_creatures.groupby(['set_name', 'cmc']).size().reset_index(name = 'cards')
creature_curve_df_b = creature_curve_df_b.groupby('cmc').cards.agg(['mean', 'min', 'max', 'std']).reset_index()
creature_curve_df_b['count_ratio'] = creature_curve_df_b['mean'] / creature_curve_df_b['mean'].sum()

# black creature curve
plt.figure(figsize = (8,5))
plt.bar(x = creature_curve_df_b['cmc'], height = creature_curve_df_b['mean'], yerr = creature_curve_df_b['std'],
        color = 'dimgray', edgecolor = 'black')
plt.ylabel('Average Card Count')
plt.xlabel('Mana Value')
plt.title('Black: Average Creature Curve');

In [None]:
creature_ratio_b = ctypes_ratio_b['Creatures'].mean()
print("Average percent of black cards which are creatures:", creature_ratio_b, "\n")

for index, row in creature_curve_df_b.iterrows():
  print("\tRatio of", row['cmc'], "drops:", row['count_ratio'])

## Red

In [None]:
red = pioneer[pioneer['color_identity'].astype(str).str.contains("R")]
card_type_breakdown_r = red.groupby('set_name').size().reset_index(name = 'commons') # number of commons

card_types = ['Creature', 'Enchantment', 'Instant', 'Sorcery', 'Artifact']
for card_type in card_types:
    new_df = red[red['type_line'].str.contains(card_type)].groupby('set_name').size().reset_index(name = card_type + "s")
    card_type_breakdown_r = card_type_breakdown_r.merge(new_df, how = 'left', on = 'set_name')

print('\nCard Type Breakdown For Red\n')
card_type_breakdown_r = card_type_breakdown_r.replace(np.nan, 0).set_index('set_name')
ctypes_ratio_r = card_type_breakdown_r.iloc[:, 1:].div(card_type_breakdown_r.commons, axis = 0)
ratio_sums = [row.Creatures + row.Enchantments + row.Instants + row.Sorcerys + row.Artifacts for index, row in ctypes_ratio_u.iterrows()]
ctypes_ratio_r['ratio_sums'] = ratio_sums
ctypes_ratio_r.head()

### Creatures

In [None]:
# red creatures df
red_creatures = red[red['type_line'].str.contains('Creature')]
creature_curve_df_r = red_creatures.groupby(['set_name', 'cmc']).size().reset_index(name = 'cards')
creature_curve_df_r = creature_curve_df_r.groupby('cmc').cards.agg(['mean', 'min', 'max', 'std']).reset_index()
creature_curve_df_r['count_ratio'] = creature_curve_df_r['mean'] / creature_curve_df_r['mean'].sum()

# red creature curve
plt.figure(figsize = (8,5))
plt.bar(x = creature_curve_df_r['cmc'], height = creature_curve_df_r['mean'], yerr = creature_curve_df_r['std'],
        color = 'crimson', edgecolor = 'black')
plt.ylabel('Average Card Count')
plt.xlabel('Mana Value')
plt.title('Red: Average Creature Curve');

In [None]:
creature_ratio_r = ctypes_ratio_r['Creatures'].mean()
print("Average percent of red cards which are creatures:", creature_ratio_r, "\n")

for index, row in creature_curve_df_r.iterrows():
  print("\tRatio of", row['cmc'], "drops:", row['count_ratio'])

## Green

In [None]:
green = pioneer[pioneer['color_identity'].astype(str).str.contains("G")]
card_type_breakdown_g = green.groupby('set_name').size().reset_index(name = 'commons') # number of commons

card_types = ['Creature', 'Enchantment', 'Instant', 'Sorcery', 'Artifact']
for card_type in card_types:
    new_df = green[green['type_line'].str.contains(card_type)].groupby('set_name').size().reset_index(name = card_type + "s")
    card_type_breakdown_g = card_type_breakdown_g.merge(new_df, how = 'left', on = 'set_name')

print('\nCard Type Breakdown For Green\n')
card_type_breakdown_g = card_type_breakdown_g.replace(np.nan, 0).set_index('set_name')
ctypes_ratio_g = card_type_breakdown_g.iloc[:, 1:].div(card_type_breakdown_g.commons, axis = 0)
ratio_sums = [row.Creatures + row.Enchantments + row.Instants + row.Sorcerys + row.Artifacts for index, row in ctypes_ratio_u.iterrows()]
ctypes_ratio_g['ratio_sums'] = ratio_sums
ctypes_ratio_g.head()

### Creatures

In [None]:
# green creatures df
green_creatures = green[green['type_line'].str.contains('Creature')]
creature_curve_df_g = green_creatures.groupby(['set_name', 'cmc']).size().reset_index(name = 'cards')
creature_curve_df_g = creature_curve_df_g.groupby('cmc').cards.agg(['mean', 'min', 'max', 'std']).reset_index()
creature_curve_df_g['count_ratio'] = creature_curve_df_g['mean'] / creature_curve_df_g['mean'].sum()

# green creature curve
plt.figure(figsize = (8,5))
plt.bar(x = creature_curve_df_g['cmc'], height = creature_curve_df_g['mean'], yerr = creature_curve_df_g['std'],
        color = 'forestgreen', edgecolor = 'black')
plt.ylabel('Average Card Count')
plt.xlabel('Mana Value')
plt.title('Green: Average Creature Curve');

In [None]:
creature_ratio_g = ctypes_ratio_g['Creatures'].mean()
print("Average percent of green cards which are creatures:", creature_ratio_g, "\n")

for index, row in creature_curve_df_g.iterrows():
  print("\tRatio of", row['cmc'], "drops:", row['count_ratio'])