## Mana-fest Destiny

In [2]:
#Imports
import os
import pandas as pd
import requests

### Load/Download and Preclean Data

In [None]:
# Set file paths
DATA_DIR = "data/"
DRAFT_CSV = os.path.join(DATA_DIR, "cleaned_data.csv")
SCRYFALL_CSV = os.path.join(DATA_DIR, "bloomburrow_cleaned.csv")

# Ensure data directory exists
os.makedirs(DATA_DIR, exist_ok=True)

# Function to load draft data (skip download if it exists)
def load_draft_data():
    if os.path.exists(DRAFT_CSV):
        print("Loading draft data from existing file...")
        return pd.read_csv(DRAFT_CSV)
    else:
        print("Draft file missing. ERROR. CANNOT PROCEED. SELF DESTRUCT IN 5, 4, 3, 2, 1")
        assert(False)


Loading draft data from existing file...
Loading Scryfall data from existing file...


### Combine Card/Deck Data, Basic Count Analysis

In [10]:
draft_df.columns

Index(['draft_id', 'draft_ti fome', 'game_time', 'build_index', 'match_number',
       'game_number', 'rank', 'opp_rank', 'main_colors', 'splash_colors',
       ...
       'tutored_Ygra, Eater of All', 'deck_Ygra, Eater of All',
       'sideboard_Ygra, Eater of All', 'opening_hand_Zoraline, Cosmos Caller',
       'drawn_Zoraline, Cosmos Caller', 'tutored_Zoraline, Cosmos Caller',
       'deck_Zoraline, Cosmos Caller', 'sideboard_Zoraline, Cosmos Caller',
       'user_n_games_bucket', 'user_game_win_rate_bucket'],
      dtype='object', length=1398)

In [14]:
# Step 4: Extract Card Types
def get_card_types(card_df: pd.DataFrame) -> set:
    """Finds all distinct card types in the dataset."""
    return {type_line.split()[0] for type_line in card_df["type_line"]}

# Step 5: Compute Deck Data
def generate_deck_data(draft_df: pd.DataFrame, card_df: pd.DataFrame, output_file: str, max_decks: int = None) -> None:
    """Aggregates deck performance data with an optional limit and saves it to a CSV file."""
    card_types = get_card_types(card_df)
    #deck_columns = ["deck_id", "wins", "losses", "avg_mana_curve", "bomb_density", "color_identity"] + [f"num_{ctype.lower()}" for ctype in card_types]
    
    # Draft id from that DF just becomes "deck_id" in the final
    grouped = draft_df.groupby("draft_id")
    deck_columns = [col for col in draft_df.columns if col.startswith("deck_")]
    deck_df = pd.DataFrame(columns=deck_columns)

    for i, (deck_id, group) in enumerate(grouped):
        if max_decks and i >= max_decks:
            break  # Stop early if max_decks is reached
        
        # Get list of card names from relevant columns
        deck_list = [col.replace("deck_", "") for col in deck_columns if group[col].sum() > 0]

        wins, losses = group["wins"].sum(), group["losses"].sum()

        non_land_cards = [card for card in deck_list if "Land" not in card_df.loc[card, "type_line"]]
        avg_mana_curve = sum(card_df.loc[card, "cmc"] for card in non_land_cards if card in card_df.index) / len(non_land_cards) if non_land_cards else 0
        bomb_density = sum(1 for card in deck_list if card_df.loc[card, "rarity"] in ["rare", "mythic"]) / len(deck_list)
        color_identity = list(set(color for card in deck_list for color in card_df.loc[card, "color_identity"]))
        type_counts = {f"num_{ctype.lower()}": sum(1 for card in deck_list if ctype in card_df.loc[card, "type_line"]) for ctype in card_types}

        deck_df.loc[len(deck_df)] = {**{"deck_id": deck_id, "wins": wins, "losses": losses, "avg_mana_curve": avg_mana_curve, "bomb_density": bomb_density, "color_identity": color_identity}, **type_counts}
    deck_df.to_csv(output_file, index=False)
    print(f"Deck DataFrame created: {deck_df.shape[0]} rows, {deck_df.shape[1]} columns (Processed up to {max_decks} decks)")


generate_deck_data(draft_df, card_df, "first_analysis.csv", 2)


KeyError: 'wins'

### 5/30 parquet and other work


In [None]:


# Load your dataset (assuming it's a CSV)
csv_file = "data/bloomburrow/games.csv"
df = pd.read_csv(csv_file)

# Save as Parquet (PyArrow format)
parquet_file = "data/bloomburrow/games.parquet"
df.to_parquet(parquet_file, engine="pyarrow", compression="snappy")  # Snappy is fast & efficient

print(f"Converted {csv_file} to {parquet_file}.")


Converted data/bloomburrow/games.csv to data/bloomburrow/games.parquet.


In [None]:
# Load the Parquet file we just created
parquet_filestr = "data/bloomburrow/games.parquet"

if "games_df" not in globals():
    games_df = pd.read_parquet(parquet_filestr, engine="pyarrow")
cards_df = pd.read_csv("data/bloomburrow/cards.csv")


In [None]:
# Demonstrate columnar efficiency by reading only a specific column

parquet_file = "data/bloomburrow/games.parquet"
df = pd.read_parquet(parquet_file, columns=["draft_id"], engine="pyarrow")

# Quick check: Print a few rows
print(df.head())

# Verify column type and memory usage
print(df.info())

                           draft_id
0  deaa4cdcd3e84d8e8b5a0ea34a0f9d79
1  deaa4cdcd3e84d8e8b5a0ea34a0f9d79
2  deaa4cdcd3e84d8e8b5a0ea34a0f9d79
3  deaa4cdcd3e84d8e8b5a0ea34a0f9d79
4  deaa4cdcd3e84d8e8b5a0ea34a0f9d79
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 931230 entries, 0 to 931229
Data columns (total 1 columns):
 #   Column    Non-Null Count   Dtype 
---  ------    --------------   ----- 
 0   draft_id  931230 non-null  object
dtypes: object(1)
memory usage: 7.1+ MB
None


In [11]:
cards_df.columns

Index(['object', 'id', 'oracle_id', 'multiverse_ids', 'mtgo_id', 'arena_id',
       'tcgplayer_id', 'cardmarket_id', 'name', 'lang', 'released_at', 'uri',
       'scryfall_uri', 'layout', 'highres_image', 'image_status', 'image_uris',
       'mana_cost', 'cmc', 'type_line', 'oracle_text', 'colors',
       'color_identity', 'keywords', 'legalities', 'games', 'reserved',
       'game_changer', 'foil', 'nonfoil', 'finishes', 'oversized', 'promo',
       'reprint', 'variation', 'set_id', 'set', 'set_name', 'set_type',
       'set_uri', 'set_search_uri', 'scryfall_set_uri', 'rulings_uri',
       'prints_search_uri', 'collector_number', 'digital', 'rarity',
       'flavor_text', 'card_back_id', 'artist', 'artist_ids',
       'illustration_id', 'border_color', 'frame', 'full_art', 'textless',
       'booster', 'story_spotlight', 'edhrec_rank', 'penny_rank', 'preview',
       'prices', 'related_uris', 'purchase_uris', 'power', 'toughness',
       'all_parts', 'security_stamp', 'promo_types', '

In [None]:
decklist_raw = analyze_first_full_draft(games_df)
print("\n--- Raw Decklist ---")
print(decklist_raw)


Analyzing data for draft_id: deaa4cdcd3e84d8e8b5a0ea34a0f9d79

--- Draft Performance for deaa4cdcd3e84d8e8b5a0ea34a0f9d79 ---
Event Record: 6-3
Win Rate: 66.67%

--- Raw Decklist ---
{'Bakersbane Duo': 1, 'Cache Grab': 2, 'Camellia, the Seedmiser': 1, 'Cindering Cutthroat': 1, 'Daggerfang Duo': 1, 'Downwind Ambusher': 1, 'Druid of the Spade': 1, 'Forest': 9, 'Glidedive Duo': 1, 'Hazardroot Herbalist': 1, "Hazel's Nocturne": 1, 'Head of the Homestead': 1, 'Longstalk Brawl': 1, 'Overprotect': 1, 'Polliwallop': 1, 'Savor': 2, 'Swamp': 8, 'Tangle Tumbler': 1, 'Tender Wildguide': 1, 'Thought-Stalker Warlock': 1, 'Three Tree Rootweaver': 2, 'Ygra, Eater of All': 1}


In [18]:
# Convert decklist dictionary to DataFrame
decklist_df = pd.DataFrame(decklist_raw.items(), columns=["name", "count"])

# Merge with Scryfall card data
decklist_enriched = decklist_df.merge(cards_df[["name", "cmc", "type_line"]], on="name", how="left")

# Compute basic deck stats
avg_mana_value = (decklist_enriched["cmc"] * decklist_enriched["count"]).sum() / decklist_enriched["count"].sum()
num_creatures = decklist_enriched[decklist_enriched["type_line"].str.contains("Creature", na=False)]["count"].sum()

# Display enriched decklist with stats
print(f"\n--- Enhanced Deck Analysis ---")
print(f"Average Mana Value: {avg_mana_value:.2f}")
print(f"Total Creatures: {num_creatures}")
print(decklist_enriched[["name", "count", "cmc", "type_line"]])



--- Enhanced Deck Analysis ---
Average Mana Value: 1.68
Total Creatures: 14
                       name  count  cmc                              type_line
0            Bakersbane Duo      1  2.0            Creature — Squirrel Raccoon
1                Cache Grab      2  2.0                                Instant
2   Camellia, the Seedmiser      1  3.0  Legendary Creature — Squirrel Warlock
3       Cindering Cutthroat      1  3.0             Creature — Lizard Assassin
4            Daggerfang Duo      1  3.0                Creature — Rat Squirrel
5         Downwind Ambusher      1  4.0              Creature — Skunk Assassin
6        Druid of the Spade      1  3.0                Creature — Rabbit Druid
7                    Forest      9  0.0                    Basic Land — Forest
8             Glidedive Duo      1  5.0                  Creature — Bat Lizard
9      Hazardroot Herbalist      1  3.0                Creature — Rabbit Druid
10         Hazel's Nocturne      1  4.0               