In [None]:
# Import statements
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
# Load the data into a pandas DataFrame
df = pd.read_json('bulk_data_default_cards.json')

df.head()

In [None]:
df.shape

In [None]:
# These are all the columns of our DataFrame and thus all the information provided for a Magic: The Gathering card
df.columns.values

In [None]:
# Kick things off by taking a look at the most recent (as of 20.10.2022) set "Dominare United" (DMU)
dmu_df = df[df['set'] == 'dmu']

dmu_df.shape

In [None]:
# As we can see, we end up with a DataFrame containing 434 rows (cards) which matches the set size found on Scryfall.
# "All good so far!" is what I'd like to say but lets inspect the DataFrame.
dmu_df[dmu_df['name'] == 'Jodah, the Unifier']

In [None]:
# What's that? We got several copies of the same card! Though they all have different IDs so they are not the same?
# The thing is Magic: The Gathering started to have different prints of the same card in sets (fullart, special arts, etc.).
# One way to get rid of these multiples is to also look for the value in the 'booster' column to be true.
dmu_df = dmu_df[dmu_df['booster'] == True]

dmu_df.shape

In [None]:
# With this we're down to 281 unique cards, which is in line with the official set size (can be found here: https://mtg.fandom.com/wiki/Dominaria_United)

In [None]:
# Plot how many cards we got of each color / color combination

count_by_color = sns.barplot(x=[''.join(color for color in colors) for colors in dmu_df.colors.value_counts().index],
                 y=dmu_df.colors.value_counts())

for item in count_by_color.get_xticklabels():
    item.set_rotation(90)

In [None]:
# Oops! We really should remove 'Land' type cards when looking at the set (these count as colorless!)
dmu_df_no_lands = dmu_df[~dmu_df['type_line'].astype(str).str.contains('Land')]

dmu_df_no_lands.shape

In [None]:
# Yep, this checks out! We have 20 Basic Lands and 19 Non-Basic Lands thus 242 + 20 + 19 = 281 which is the number of cards
# when filtered by set dmu and booster True. So let's plot the set again:
count_by_color = sns.barplot(x=[''.join(color for color in colors) for colors in dmu_df_no_lands.colors.value_counts().index],
                 y=dmu_df_no_lands.colors.value_counts())

for item in count_by_color.get_xticklabels():
    item.set_rotation(90)

In [None]:
# Wrap this up in a method..
def plot_card_count_by_color(set: str) -> None:
    set_df = df[(df['set'] == set) & (df['booster'] == True) & (~df['type_line'].astype(str).str.contains('Land'))]
    set_colors = set_df.colors.value_counts()
    count_by_color = sns.barplot(x=[''.join(color for color in colors) for colors in set_colors.index],
                                 y=set_colors)
    count_by_color.set(xlabel='Colors', ylabel='Count')
    for item in count_by_color.get_xticklabels():
        item.set_rotation(90)

In [None]:
# ..and use it for all the sets you want to
plot_card_count_by_color('2x2')

In [None]:
# Now the question arises, apart from card count by color what else is there to look at in a single set?
# How about card cost by color? Maybe power and toughness of creature type cards?

# Same graph for mana value (converted mana cost) per card:
def plot_card_count_by_mv(set: str) -> None:
    # We still do not want Lands as they are cards with mana value (mv) of 0
    set_df = df[(df['set'] == set) & (df['booster'] == True) & (~df['type_line'].astype(str).str.contains('Land'))]
    set_cmcs = set_df.cmc.value_counts()
    count_by_cmc = sns.barplot(x=set_cmcs.index,
                                 y=set_cmcs)
    count_by_cmc.set(xlabel='Mana Cost', ylabel='Count')
    for item in count_by_cmc.get_xticklabels():
        item.set_rotation(90)

In [None]:
plot_card_count_by_mv('2x2')

In [None]:
# Average mv per color and throughout the whole set
def average_mv_by_color(set: str) -> list[float]:
    set_df = df[(df['set'] == set) & (df['booster'] == True) & (~df['type_line'].astype(str).str.contains('Land'))]
    set_df.loc[:, 'colors'] = set_df['colors'].apply(''.join)
    colors = set_df.colors.unique()
    d = {color: (set_df[set_df['colors'] == color]['cmc']).mean() for color in colors}
    mv_by_color = sns.barplot(x=list(d), y=list(d.values()))
    for item in mv_by_color.get_xticklabels():
        item.set_rotation(90)

In [None]:
average_mv_by_color('2x2')

In [None]:
def get_set(set_name: str, without_basic_lands: bool = True) -> pd.DataFrame:
    """
    set_name: set name abbreviation, three letters
    without_basic_lands: should the resulting DataFrame containg land cards
    return: pd.DataFrame containing all cards of the given set
    """
    set_df = df[df['set'] == set_name]
    set_df = set_df[set_df['booster'] == True]
    
    # Color
    def color_handling(color_list: list) -> str:
        if color_list is np.nan:  # Double Faced cards
            return 'C'
        elif not color_list:
            return 'C'
        else:
            color_order = 'WUBRG'
            return ''.join(sorted(color_list, key=lambda colors: [color_order.index(c) for c in colors]))
    
    set_df['colors'] = set_df['colors'].apply(color_handling)
    
    # Type Line - One Hot Encoded
    card_types = ['Land', 'Creature', 'Artifact', 'Enchantment', 'Planeswalker', 'Instant', 'Sorcery']
    for card_type in card_types:
        set_df[card_type.lower()] = set_df['type_line'].str.contains(card_type)
        # Not looking at any subtypes e.g. Equipment, Aura, etc.
    
    
    # Lands
    if not without_basic_lands:
        return set_df
    
    set_df = set_df[~set_df['type_line'].astype(str).str.contains('Basic Land')]
        
    return set_df

In [None]:
dmu_df = get_set('dmu')

dmu_df.head()

In [None]:
def visualize_set(set_name: str) -> None:
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    
    set_df = get_set(set_name)
    
    # Anzahl der Karten pro Farbe
    axes[0].set_title('Anzahl der Karten pro Farbe')
    cards_per_color = set_df.groupby(['colors'])['colors'].count()
    sns.barplot(ax=axes[0], x=cards_per_color.index, y=cards_per_color.values)
    for item in axes[0].get_xticklabels():
        item.set_rotation(90)
    
    # Anzahl der Karten pro Kartentyp
    axes[1].set_title('Anzahl der Karten pro Kartentype')
    cards_per_type = []
    card_types = ['Land', 'Creature', 'Artifact', 'Enchantment', 'Planeswalker', 'Instant', 'Sorcery']
    for card_type in card_types:
        cards_per_type.append(set_df[card_type.lower()].values.sum())
    sns.barplot(ax=axes[1], x=card_types, y=cards_per_type)
    for item in axes[1].get_xticklabels():
        item.set_rotation(90)
        
    # Anzahl der Karten pro Seltenheit
    axes[2].set_title('Anzahl der Karten pro Seltenheit')
    rarity_order = ['common', 'uncommon', 'rare', 'mythic']
    cards_per_rarity = set_df.groupby(['rarity'])['rarity'].count()
    cards_per_rarity.sort_index(inplace=True, key=lambda idxs: [rarity_order.index(i) for i in idxs])  # has to return list the same size as the Series 
    sns.barplot(ax=axes[2], x=cards_per_rarity.index, y=cards_per_rarity.values, palette=['Black', 'Grey', 'Gold', 'Orange'])

In [None]:
visualize_set('dmu')

In [None]:
def visualize_sets(set_names: list[str]) -> None:
    fig, axes = plt.subplots(len(set_names), 3, figsize=(15, 5 * len(set_names)), sharex='col')
    
    axes[0][0].set_title('Anzahl der Karten pro Farbkombination')
    axes[0][1].set_title('Anzahl der Karten pro Kartentype')
    axes[0][2].set_title('Anzahl der Karten pro Seltenheit')
    
    for i, s in enumerate(set_names):
        set_df = get_set(s)
        
        axes[i][0].set_ylabel(s)
        
        # Anzahl der Karten pro Farbe
        color_order = ['C', 'W', 'U', 'B', 'R', 'G',
                       'WU', 'UB', 'BR', 'RG', 'WG', 'WB', 'UR', 'BG', 'WR', 'UG',
                       'WUB', 'UBR', 'BRG', 'WRG', 'WUG', 'WBG', 'WUR', 'UBG', 'WBR', 'URG',
                       'WUBR', 'UBRG', 'WBRG', 'WURG', 'WUBG', 
                       'WUBRG']
        # cards_per_color = set_df.groupby(['colors'])['colors'].count()
        # cards_per_color.sort_index(inplace=True, key=lambda colors: [color_order.index(c) for c in colors])
        cards_per_color = []
        for color in color_order:
            cards_per_color.append(len(set_df[set_df['colors'].str.fullmatch(color)].index))
        sns.barplot(ax=axes[i][0], x=color_order, y=cards_per_color)
        for item in axes[i][0].get_xticklabels():
            item.set_rotation(90)
    
        # Anzahl der Karten pro Kartentyp
        cards_per_type = []
        card_types = ['Land', 'Creature', 'Artifact', 'Enchantment', 'Planeswalker', 'Instant', 'Sorcery']
        for card_type in card_types:
            cards_per_type.append(set_df[card_type.lower()].values.sum())
        sns.barplot(ax=axes[i][1], x=card_types, y=cards_per_type)
        for item in axes[i][1].get_xticklabels():
            item.set_rotation(90)
        
        # Anzahl der Karten pro Seltenheit
        rarity_order = ['common', 'uncommon', 'rare', 'mythic']
        cards_per_rarity = set_df.groupby(['rarity'])['rarity'].count()
        cards_per_rarity.sort_index(inplace=True, key=lambda idxs: [rarity_order.index(i) for i in idxs])  # has to return list the same size as the Series 
        sns.barplot(ax=axes[i][2], x=cards_per_rarity.index.values, y=cards_per_rarity.values, palette=['Black', 'Grey', 'Gold', 'Orange'])

In [None]:
visualize_sets(['dmu', 'snc', 'neo', 'vow', 'mid'])