### This script will combine the cards.feather and decks.feather into one dataframe which can be used for input to a model

In [115]:
import pandas as pd
import numpy as np
import os
import logging
import warnings
from IPython.display import display, HTML


In [116]:
warnings.simplefilter("ignore")
pd.options.display.max_columns = 20000

In [117]:
# Define a custom log handler that writes messages to the notebook output
class NotebookLogHandler(logging.Handler):
    def emit(self, record):
        message = self.format(record)
        display(HTML(f'<p style="color: {record.levelname.lower()}">{message}</p>'))

# Create a logger and set its level to INFO
logger = logging.getLogger()
logger.setLevel(logging.INFO)

# Create a formatter and add it to the logger
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler = NotebookLogHandler()
handler.setFormatter(formatter)
logger.addHandler(handler)

#### Read in our cards and deck data

In [118]:
# Get the absolute path of the current working directory
current_dir = os.path.abspath(os.getcwd())

# Get the absolute path of the parent directory
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))

# Read our card database
file_path = os.path.join(parent_dir, 'data', 'mtg_card_database')

card_fp = os.path.join(parent_dir, 'data', 'cards.feather')
deck_fp = os.path.join(parent_dir, 'data', 'decks.feather')

card_df = pd.read_feather(card_fp)
deck_df = pd.read_feather(deck_fp)

deck_df

Unnamed: 0,decks
0,"[1 Unlicensed Hearse, 1 Pithing Needle, 1 Fate..."
1,"[1 Black Sun's Twilight, 2 Reckoner Bankbuster..."
2,"[3 Atraxa, Grand Unifier, 4 Fable of the Mirro..."
3,"[1 Resplendent Marshal, 4 Bishop of Wings, 4 R..."
4,"[4 Tyvar, Jubilant Brawler, 4 Harald Unites th..."
...,...
879,"[1 Mana Crypt, 1 Crucible of Worlds, 1 Black L..."
880,"[1 Tendrils of Agony, 1 Time Walk, 1 Ponder, 1..."
881,"[1 Echoing Truth, 1 Yawgmoth's Will, 1 Time Va..."
882,"[1 Mental Misstep, 1 Ancestral Recall, 1 Vampi..."


In [119]:
def create_io(deck):
    """
        Creates the input and output row from a single deck row in df_decks
        The data should be of format [1 'Atraxa...' 2 'Swamp'], etc
        Data will be output as two single line input and outputs x and y

    Args:
        deck (pd.Series): a single row which we will extract data from

    Returns:
        2x pd.Series: df_merged and df_lands which comprise of the summed card costs, etc. and the number of lands and what colors they produce
    """
    test_deck = deck_df.iloc[2][0]
    test_deck = pd.DataFrame(data={'card+amt': deck_df.iloc[2][0]})

    # Extract the card amount
    def extract_card(row):
        try:
            card = row[1:].strip()
            return card
        except:
            return row

    def extract_card_amt(row):
        try:
            amount_of_card = row[0]
            return amount_of_card
        except:
            return row
    

    test_deck = test_deck.copy()
    test_deck['card'] = test_deck['card+amt'].apply(lambda row: extract_card(row))
    test_deck['amount'] = test_deck['card+amt'].apply(lambda row: extract_card_amt(row))

    test_deck = test_deck.drop(columns='card+amt')


    # We need to get a name from the card database which can be joined with the deck
    # But since there are DFC cards with the format 'name1 // name2' we need to extract name1 to create a valid join
    def handle_dfc(row):
        try:
            return row.split(' // ')[0]
        except:
            return row

    card_df['name2'] = card_df['name'].apply(lambda row: handle_dfc(row))


    # Merge the deck back together with the cards data
    merged_df = pd.merge(left=test_deck, right=card_df, left_on='card', right_on='name2', how='inner')

    # Drop the irrelevant columns
    merged_df = merged_df.drop(columns=[
        'card',
        'name',
        'type_line',
        'name2'
    ])
    merged_df

    card_df[card_df['name'] == 'Fable of the Mirror-Breaker // Reflection of Kiki-Jiki'].name2


    # Extract the lands from the deck into their own dataframe
    land_df = merged_df[merged_df['is_land'] == 1]
    merged_df = merged_df[merged_df['is_land'] == 0]

    num_cards = merged_df['amount'].astype(int).sum()


    # Multiply relevant columns by the amount col
    def multiply_amounts(row):
        try:
            return row * int(row.amount)
        except:
            return row

    merged_df = merged_df.apply(multiply_amounts, axis=1)
    land_df = land_df.apply(multiply_amounts, axis=1)

    # Rename and drop some columns
    col_to_drop = ['amount']
    merged_df = merged_df.drop(columns=col_to_drop)
    land_df = land_df.drop(columns=col_to_drop)
    col_rename = {
        'produces_W' : 'color_sources_W',
        'produces_U' : 'color_sources_U',
        'produces_B' : 'color_sources_B',
        'produces_R' : 'color_sources_R',
        'produces_G' : 'color_sources_G',
        'produces_C' : 'color_sources_C',
        'is_land' : 'number_of_lands',
    }
    land_df = land_df.rename(columns=col_rename)


    def sum_columns_to_single_row(df):
        summed_values = {column: int(df[column].sum()) for column in df.columns}
        return pd.DataFrame([summed_values])

    df_merged = sum_columns_to_single_row(merged_df)
    df_lands = sum_columns_to_single_row(land_df)

    # Convert cmc from a total value to an average
    df_merged = df_merged.rename(columns={'cmc': 'avg_cmc'})
    df_merged['avg_cmc'] = df_merged['avg_cmc'] / num_cards

    # Clean up the output
    df_lands = df_lands.drop(columns=[
        'cmc',
        'cast_cost_W',
        'cast_cost_U',
        'cast_cost_B',
        'cast_cost_R',
        'cast_cost_G',
        'cast_cost_C',
        'cast_cost_P',
        'produces_P',
        'num_x_in_mana_cost',
        'has_looting',
        'has_carddraw',
        'makes_treasure_tokens',
        'reduced_spells',
        'free_spells',
    ])

    return df_merged, df_lands



In [120]:
num_decks = len(deck_df)

input = pd.DataFrame(columns=[
    'avg_cmc', 
    'cast_cost_W',
    'cast_cost_U',
    'cast_cost_B',
    'cast_cost_R',
    'cast_cost_G',
    'cast_cost_C',
    'cast_cost_P',
    'produces_W',
    'produces_U',
    'produces_B',
    'produces_R',
    'produces_G',
    'produces_C',
    'produces_P',
    'num_x_in_mana_cost',
    'has_looting',
    'has_carddraw',
    'makes_treasure_tokens',
    'reduced_spells',
    'free_spells',
    'is_land',
])
output = pd.DataFrame(columns=[
    'color_sources_W',
    'color_sources_U',
    'color_sources_B',
    'color_sources_R',
    'color_sources_G',
    'color_sources_C',
    'number_of_lands',
])

for i in range(num_decks):
    x, y = create_io(deck_df.iloc[i])
    input = input.append(x)
    output = output.append(y)

print(input)
print(output)



     avg_cmc cast_cost_W cast_cost_U cast_cost_B cast_cost_R cast_cost_G  \
0   3.083333           3           3          19          10          14   
0   3.083333           3           3          19          10          14   
0   3.083333           3           3          19          10          14   
0   3.083333           3           3          19          10          14   
0   3.083333           3           3          19          10          14   
..       ...         ...         ...         ...         ...         ...   
0   3.083333           3           3          19          10          14   
0   3.083333           3           3          19          10          14   
0   3.083333           3           3          19          10          14   
0   3.083333           3           3          19          10          14   
0   3.083333           3           3          19          10          14   

   cast_cost_C cast_cost_P produces_W produces_U produces_B produces_R  \
0            

In [121]:
input_fp = os.path.join(parent_dir, 'data', 'input.feather')
output_fp = os.path.join(parent_dir, 'data', 'output.feather')

input.reset_index(inplace=True)
output.reset_index(inplace=True)
input.to_feather(input_fp)
output.to_feather(output_fp)