# 📘 0_parsing_mechanics.ipynb

Extracts mechanics from `oracle_text` using regex-based matching from `mechanics_full.json`.

In [None]:
import pandas as pd
import json
import re
from pathlib import Path


In [None]:
# === Load raw Scryfall card data ===
raw_path = Path("../data/raw/scryfall_cards.csv")

if not raw_path.exists():
    raise FileNotFoundError(f"❌ Could not find raw Scryfall data at {raw_path}")

df = pd.read_csv(raw_path)
df = df.dropna(subset=["oracle_text"])
print(f"✅ Loaded {len(df)} cards with oracle text.")


In [None]:
# === Load structured mechanics definitions ===
mechanics_path = Path("../data/static/mechanics_full.json")

if not mechanics_path.exists():
    raise FileNotFoundError(f"❌ Could not find mechanics JSON at {mechanics_path}")

with open(mechanics_path) as f:
    mechanics = json.load(f)

print(f"✅ Loaded {len(mechanics)} mechanics definitions.")


In [None]:
# === Mechanic extractor ===
def extract_mechanics(text, mechanics_list):
    found = []
    verbose = []
    for m in mechanics_list:
        try:
            pattern = m.get("regex", "")
            if pattern and re.search(pattern, text, re.IGNORECASE):
                found.append(m["mechanic"])
                verbose.append(m)
        except re.error as e:
            print(f"⚠️ Regex error for {m.get('mechanic')}: {e}")
    return found, verbose


In [None]:
# === Apply mechanic extraction to each card ===
results = df["oracle_text"].apply(lambda text: extract_mechanics(text, mechanics))
df["parsed_mechanics"], df["parsed_mechanics_verbose"] = zip(*results)

print("✅ Parsing complete. Example:")
print(df[["name", "parsed_mechanics"]].head())


In [None]:
# === Save parsed dataset ===
output_path = Path("../data/processed/parsed_cards.csv")
output_path.parent.mkdir(parents=True, exist_ok=True)
df.to_csv(output_path, index=False)
print(f"✅ Saved parsed data to {output_path}")
