# 📊 1_feature_engineering.ipynb

Enriches parsed card data with additional metadata: color, cmc, type, rarity, etc.

In [None]:
import pandas as pd
import json
from pathlib import Path


In [None]:
# === Load parsed card data ===
parsed_path = Path("../data/processed/parsed_cards.csv")

if not parsed_path.exists():
    raise FileNotFoundError(f"❌ Could not find parsed card data at {parsed_path}")

df = pd.read_csv(parsed_path)

# If mechanics were stringified lists, safely convert them
def try_parse(val):
    try:
        return json.loads(val)
    except Exception:
        return []

df["parsed_mechanics"] = df["parsed_mechanics"].apply(try_parse)
df["parsed_mechanics_verbose"] = df["parsed_mechanics_verbose"].apply(try_parse)

print(f"✅ Loaded {len(df)} parsed cards.")


In [None]:
# === Add placeholder enrichment fields ===
# Replace this with actual Scryfall metadata integration if available
df["colors"] = df.get("colors", [["U"]] * len(df))
df["mana_cost"] = df.get("mana_cost", ["{2}{U}"] * len(df))
df["cmc"] = df.get("cmc", [3] * len(df))
df["type_line"] = df.get("type_line", ["Creature — Merfolk Wizard"] * len(df))
df["rarity"] = df.get("rarity", ["uncommon"] * len(df))
df["set"] = df.get("set", ["znr"] * len(df))
df["released_at"] = df.get("released_at", ["2020-09-25"] * len(df))
df["power"] = df.get("power", ["2"] * len(df))
df["toughness"] = df.get("toughness", ["1"] * len(df))


In [None]:
# === Reorder columns and export ===
columns_order = [
    "name", "oracle_text", "parsed_mechanics", "parsed_mechanics_verbose",
    "colors", "mana_cost", "cmc", "type_line", "rarity", "set",
    "released_at", "power", "toughness"
]
df = df[[col for col in columns_order if col in df.columns]]

output_path = Path("../data/processed/enriched_cards.csv")
output_path.parent.mkdir(parents=True, exist_ok=True)
df.to_csv(output_path, index=False)
print(f"✅ Saved enriched card data to {output_path}")
