In [26]:
# Import all our libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import tensorflow as tf

print("All libraries imported successfully!")

dtype_dict = {
    'loyalty': str,
    'attraction_lights': str,
    'flavor_name': str,
    'printed_name': str,
    'printed_type_line': str,
    'printed_text': str,
}
df_clean = pd.read_csv('data/mtg_cards_clean.csv', dtype=dtype_dict)
print(f"Loaded {len(df_clean)} cards for ML model")

All libraries imported successfully!
Loaded 27623 cards for ML model


In [28]:
# Create TF-IDF vectors from card text
print("Creating TF-IDF vectors...")
tfidf = TfidfVectorizer(
    max_features=5000,      # Top 5000 most important words
    stop_words='english',   # Remove common words like 'the', 'and'
    ngram_range=(1, 2),     # Use single words and word pairs
    min_df=5                # Word must appear in at least 2 cards
)

# Transform card text into numerical vectors
tfidf_matrix = tfidf.fit_transform(df_clean['combined_text'])
print(f"TF-IDF matrix shape: {tfidf_matrix.shape}")
print(f"Each card is now a vector of {tfidf_matrix.shape[1]} numbers!")

# Let's see what words the model found most important
feature_names = tfidf.get_feature_names_out()
print(f"\nSample important words: {feature_names[:100]}")

Creating TF-IDF vectors...
TF-IDF matrix shape: (27623, 5000)
Each card is now a vector of 5000 numbers!

Sample important words: ['10' '10 10' '10 19' '10 damage' '10 life' '12' '13' '13 life' '19' '20'
 'abilities' 'abilities activate' 'abilities activated' 'abilities aren'
 'abilities artifacts' 'abilities creature' 'abilities creatures'
 'abilities enchantment' 'abilities opponents' 'abilities pays'
 'abilities targeted' 'abilities types' 'ability' 'ability artifact'
 'ability choose' 'ability control' 'ability costs' 'ability creature'
 'ability end' 'ability legendary' 'ability opponent' 'ability permanent'
 'ability resolved' 'ability sacrifice' 'ability triggers' 'able'
 'able attack' 'able attacks' 'able block' 'able creature'
 'able enchantment' 'able gain' 'able legendary' 'able sorcery' 'activate'
 'activate abilities' 'activate ability' 'activate control'
 'activate creature' 'activate exhaust' 'activate loyalty'
 'activate opponent' 'activate seven' 'activate sorcery' 'ac

In [30]:
# def find_similar_cards(card_name, top_n=10):
#     """
#     Find cards similar to the input card name
#     """
#     # Find the card in our dataset
#     card_matches = df_clean[df_clean['name'].str.contains(card_name, case=False, na=False)]
    
#     if len(card_matches) == 0:
#         return f"Card '{card_name}' not found in dataset"
    
#     # Get the first match
#     card_idx = card_matches.index[0]
#     card_vector = tfidf_matrix[card_idx]
    
#     # Calculate similarity with all other cards
#     similarities = cosine_similarity(card_vector, tfidf_matrix).flatten()
    
#     # Get top similar cards (excluding the card itself)
#     similar_indices = similarities.argsort()[-top_n-1:-1][::-1]
    
#     # Return results
#     results = []
#     for idx in similar_indices:
#         if idx != card_idx:  # Don't include the original card
#             results.append({
#                 'name': df_clean.iloc[idx]['name'],
#                 'similarity': similarities[idx],
#                 'type': df_clean.iloc[idx]['type_line'],
#                 'text': df_clean.iloc[idx]['oracle_text'][:100] + "..."
#             })
    
#     return results[:top_n]

# # Test the recommendation system!
# print("Testing with MacCready, Lamplight Mayor...")
# recommendations = find_similar_cards("MacCready, Lamplight Mayor")
# for i, card in enumerate(recommendations):
#     print(f"{i+1}. {card['name']} (similarity: {card['similarity']:.3f})")
#     print(f"   Type: {card['type']}")
#     print(f"   Text: {card['text']}")
#     print()

Testing with MacCready, Lamplight Mayor...
1. Gregor, Shrewd Magistrate (similarity: 0.504)
   Type: Legendary Creature — Human Advisor
   Text: Skulk (This creature can't be blocked by creatures with greater power.)
Whenever Gregor, Shrewd Magi...

2. Furtive Homunculus (similarity: 0.462)
   Type: Creature — Homunculus
   Text: Skulk (This creature can't be blocked by creatures with greater power.)...

3. Behind the Scenes (similarity: 0.450)
   Type: Enchantment
   Text: Creatures you control have skulk. (They can't be blocked by creatures with greater power.)
{4}{W}: C...

4. Pale Rider of Trostad (similarity: 0.449)
   Type: Creature — Spirit
   Text: Skulk (This creature can't be blocked by creatures with greater power.)
When this creature enters, d...

5. The Master, Mesmerist (similarity: 0.413)
   Type: Legendary Creature — Time Lord Rogue
   Text: {T}: Target creature an opponent controls with power less than or equal to The Master's power gains ...

6. Aysen Bureaucrats (sim

In [4]:
# # Let's see what the model thinks is "similar" about Atraxa
# atraxa_idx = df_clean[df_clean['name'].str.contains('Atraxa', case=False)].index[0]
# atraxa_text = df_clean.iloc[atraxa_idx]['combined_text']
# print("Atraxa's combined text:")
# print(atraxa_text)
# print("\n" + "="*50)

# # Let's see what Broken Wings text looks like
# broken_wings_idx = df_clean[df_clean['name'].str.contains('Broken Wings', case=False)].index[0]
# broken_wings_text = df_clean.iloc[broken_wings_idx]['combined_text']
# print("Broken Wings combined text:")
# print(broken_wings_text)

Atraxa's combined text:
Destroy target artifact, battle, enchantment, or creature with flying. Sorcery []

Broken Wings combined text:
Destroy target artifact, enchantment, or creature with flying. Instant []


In [31]:
# Let's properly find MacCready, Lamplight Mayor in our dataset
print("Searching for MacCready, Lamplight Mayor in our dataset...")
maccready_matches = df_clean[df_clean['name'].str.contains('MacCready, Lamplight Mayor', case=False, na=False)]
print(f"Found {len(maccready_matches)} matches:")
print(maccready_matches[['name', 'type_line']].head())

if len(maccready_matches) > 0:
    # Get the actual MacCready, Lamplight Mayor
    maccready_row = maccready_matches.iloc[0]
    print(f"\nActual MacCready, Lamplight Mayor data:")
    print(f"Name: {maccready_row['name']}")
    print(f"Type: {maccready_row['type_line']}")
    print(f"Oracle text: {maccready_row['oracle_text']}")
    print(f"Combined text: {maccready_row['combined_text']}")
else:
    print("MacCready, Lamplight Mayor not found! Let's see what commanders we do have...")
    commanders = df_clean[df_clean['type_line'].str.contains('Legendary', case=False, na=False)]
    print(f"Sample commanders in dataset:")
    print(commanders[['name', 'type_line']].head(10))

Searching for MacCready, Lamplight Mayor in our dataset...
Found 1 matches:
                             name                           type_line
13984  MacCready, Lamplight Mayor  Legendary Creature — Human Advisor

Actual MacCready, Lamplight Mayor data:
Name: MacCready, Lamplight Mayor
Type: Legendary Creature — Human Advisor
Oracle text: Whenever a creature you control with power 2 or less attacks, it gains skulk until end of turn. (It can't be blocked by creatures with greater power.)
Whenever a creature with power 4 or greater attacks you, its controller loses 2 life and you gain 2 life.
Combined text: Whenever a creature you control with power 2 or less attacks, it gains skulk until end of turn. (It can't be blocked by creatures with greater power.)
Whenever a creature with power 4 or greater attacks you, its controller loses 2 life and you gain 2 life. Legendary Creature — Human Advisor []


In [32]:
# Let's search for MacCready, Lamplight Mayor in our dataset
print("Searching for MacCready, Lamplight Mayor in our dataset...")
maccready_matches = df_clean[df_clean['name'].str.contains('MacCready, Lamplight Mayor', case=False, na=False)]
print(f"Found {len(arcades_matches)} matches:")

if len(maccready_matches) > 0:
    print(maccready_matches[['name', 'type_line']].head())
    
    # Get the actual MacCready, Lamplight Mayor data
    maccready_row = maccready_matches.iloc[0]
    print(f"\nActual MacCready, Lamplight Mayor data:")
    print(f"Name: {maccready_row['name']}")
    print(f"Type: {maccready_row['type_line']}")
    print(f"Oracle text: {maccready_row['oracle_text']}")
    print(f"Combined text: {maccready_row['combined_text'][:200]}...")
    
    # Test our recommendation function with MacCready, Lamplight Mayor
    print("\n" + "="*50)
    print("Testing recommendations for MacCready, Lamplight Mayor...")
    recommendations = find_similar_cards("MacCready, Lamplight Mayor")
    for i, card in enumerate(recommendations):
        print(f"{i+1}. {card['name']} (similarity: {card['similarity']:.3f})")
        print(f"   Type: {card['type']}")
        print(f"   Text: {card['text']}")
        print()
        
else:
    print("MacCready, Lamplight Mayor not found either! Let's see what cards we have:")
    # Show some sample card names
    sample_cards = df_clean['name'].head(20)
    print("Sample cards in dataset:")
    for card in sample_cards:
        print(f"- {card}")

Searching for MacCready, Lamplight Mayor in our dataset...
Found 1 matches:
                             name                           type_line
13984  MacCready, Lamplight Mayor  Legendary Creature — Human Advisor

Actual MacCready, Lamplight Mayor data:
Name: MacCready, Lamplight Mayor
Type: Legendary Creature — Human Advisor
Oracle text: Whenever a creature you control with power 2 or less attacks, it gains skulk until end of turn. (It can't be blocked by creatures with greater power.)
Whenever a creature with power 4 or greater attacks you, its controller loses 2 life and you gain 2 life.
Combined text: Whenever a creature you control with power 2 or less attacks, it gains skulk until end of turn. (It can't be blocked by creatures with greater power.)
Whenever a creature with power 4 or greater attac...

Testing recommendations for MacCready, Lamplight Mayor...
1. Gregor, Shrewd Magistrate (similarity: 0.504)
   Type: Legendary Creature — Human Advisor
   Text: Skulk (This creatur

In [33]:
# Let's see what words the TF-IDF model thinks are most important
def analyze_tfidf_features():
    # Get feature names and their importance
    feature_names = tfidf.get_feature_names_out()
    
    # For MacCready, Lamplight Mayor, let's see which features have the highest weights
    maccready_idx = df_clean[df_clean['name'].str.contains('MacCready, Lamplight Mayor', case=False)].index[0]
    maccready_vector = tfidf_matrix[maccready_idx].toarray()[0]
    
    # Get top features for MacCready, Lamplight Mayor
    top_indices = maccready_vector.argsort()[-20:][::-1]
    print("Top 20 TF-IDF features for MacCready, Lamplight Mayor:")
    for idx in top_indices:
        if maccready_vector[idx] > 0:
            print(f"{feature_names[idx]}: {maccready_vector[idx]:.3f}")

analyze_tfidf_features()

Top 20 TF-IDF features for MacCready, Lamplight Mayor:
power: 0.323
greater: 0.285
attacks gains: 0.237
skulk: 0.234
creatures greater: 0.231
attacks: 0.217
controller loses: 0.204
human advisor: 0.202
greater power: 0.201
control power: 0.198
life legendary: 0.192
life: 0.186
turn blocked: 0.184
advisor: 0.183
power creature: 0.171
life gain: 0.162
power greater: 0.161
blocked creatures: 0.149
creature power: 0.147
loses life: 0.135


In [36]:
# Let's try a completely different approach focused on MTG mechanics
def create_keyword_features(df):
    """Create features based on important MTG keywords and mechanics"""
    
    # Important MTG mechanics for synergy
    important_keywords = [
        'defender', 'flying', 'vigilance', 'deathtouch', 'lifelink', 'trample',
        'haste', 'first strike', 'double strike', 'hexproof', 'indestructible',
        'proliferate', 'counter', 'artifact', 'enchantment', 'token', 'draw',
        'graveyard', 'exile', 'sacrifice', 'destroy', 'search', 'toughness',
        'power', 'enters', 'whenever', 'combat damage', 'attacks', 'skulk', 'lose', 'gain', 'life'
    ]
    
    # Create binary features for each keyword
    keyword_matrix = []
    
    for _, row in df.iterrows():
        text = row['combined_text'].lower()
        features = []
        
        for keyword in important_keywords:
            # Count how many times this keyword appears
            count = text.count(keyword)
            features.append(min(count, 3))  # Cap at 3 to avoid over-weighting
            
        keyword_matrix.append(features)
    
    return np.array(keyword_matrix), important_keywords

# Create keyword-based features
keyword_matrix, keywords = create_keyword_features(df_clean)
print(f"Keyword matrix shape: {keyword_matrix.shape}")
print(f"Keywords: {keywords}")

# Test with MacCready, Lamplight Mayor
maccready_idx = df_clean[df_clean['name'].str.contains('MacCready, Lamplight Mayor', case=False)].index[0]
maccready_features = keyword_matrix[maccready_idx]
print(f"\nMacCready, Lamplight Mayor keyword features:")
for i, keyword in enumerate(keywords):
    if maccready_features[i] > 0:
        print(f"{keyword}: {maccready_features[i]}")

Keyword matrix shape: (27623, 32)
Keywords: ['defender', 'flying', 'vigilance', 'deathtouch', 'lifelink', 'trample', 'haste', 'first strike', 'double strike', 'hexproof', 'indestructible', 'proliferate', 'counter', 'artifact', 'enchantment', 'token', 'draw', 'graveyard', 'exile', 'sacrifice', 'destroy', 'search', 'toughness', 'power', 'enters', 'whenever', 'combat damage', 'attacks', 'skulk', 'lose', 'gain', 'life']

MacCready, Lamplight Mayor keyword features:
power: 3
whenever: 2
attacks: 2
skulk: 1
lose: 1
gain: 2
life: 2


In [37]:
def find_similar_cards_keywords(card_name, top_n=10):
    """
    Find cards similar to the input card using keyword-based features
    """
    # Find the card in our dataset
    card_matches = df_clean[df_clean['name'].str.contains(card_name, case=False, na=False)]
    
    if len(card_matches) == 0:
        return f"Card '{card_name}' not found in dataset"
    
    # Get the card's keyword features
    card_idx = card_matches.index[0]
    card_vector = keyword_matrix[card_idx].reshape(1, -1)
    
    # Calculate similarity with all other cards using keyword features
    similarities = cosine_similarity(card_vector, keyword_matrix).flatten()
    
    # Get top similar cards (excluding the card itself)
    similar_indices = similarities.argsort()[-top_n-1:-1][::-1]
    
    # Return results
    results = []
    for idx in similar_indices:
        if idx != card_idx:  # Don't include the original card
            results.append({
                'name': df_clean.iloc[idx]['name'],
                'similarity': similarities[idx],
                'type': df_clean.iloc[idx]['type_line'],
                'text': df_clean.iloc[idx]['oracle_text'][:150] + "...",
                'keywords': [keywords[i] for i in range(len(keywords)) if keyword_matrix[idx][i] > 0]
            })
    
    return results[:top_n]

# Test the improved recommendation system!
print("Testing improved recommendations for MacCready, Lamplight Mayor...")
recommendations = find_similar_cards_keywords("MacCready, Lamplight Mayor")

for i, card in enumerate(recommendations):
    print(f"{i+1}. {card['name']} (similarity: {card['similarity']:.3f})")
    print(f"   Type: {card['type']}")
    print(f"   Shared keywords: {card['keywords']}")
    print(f"   Text: {card['text']}")
    print()

Testing improved recommendations for MacCready, Lamplight Mayor...
1. Quilled Charger (similarity: 0.866)
   Type: Creature — Porcupine Mount
   Shared keywords: ['power', 'whenever', 'attacks', 'gain']
   Text: Whenever this creature attacks while saddled, it gets +1/+2 and gains menace until end of turn. (It can't be blocked except by two or more creatures.)...

2. Mycoid Shepherd (similarity: 0.866)
   Type: Creature — Fungus
   Shared keywords: ['power', 'whenever', 'gain', 'life']
   Text: Whenever this creature or another creature you control with power 5 or greater dies, you may gain 5 life....

3. Courageous Goblin (similarity: 0.866)
   Type: Creature — Goblin
   Shared keywords: ['power', 'whenever', 'attacks', 'gain']
   Text: Whenever this creature attacks while you control a creature with power 4 or greater, this creature gets +1/+0 and gains menace until end of turn. (It ...

4. Raubahn, Bull of Ala Mhigo (similarity: 0.866)
   Type: Legendary Creature — Human Warrior
   

In [38]:
# # First, let's check Arcades' color identity
# arcades_row = df_clean[df_clean['name'].str.contains('Arcades, the Strategist', case=False)].iloc[0]
# print(f"Arcades colors: {arcades_row['colors']}")

# def get_color_identity(colors_list):
#     """Convert color list to set for easier comparison"""
#     if pd.isna(colors_list):
#         return set()
#     # Handle string representation of list
#     if isinstance(colors_list, str):
#         import ast
#         try:
#             colors_list = ast.literal_eval(colors_list)
#         except:
#             return set()
#     return set(colors_list) if colors_list else set()

# def is_legal_in_deck(card_colors, commander_colors):
#     """Check if card is legal in commander's color identity"""
#     card_identity = get_color_identity(card_colors)
#     commander_identity = get_color_identity(commander_colors)
    
#     # Card is legal if all its colors are in commander's identity
#     return card_identity.issubset(commander_identity)

# # Test this function
# arcades_colors = arcades_row['colors']
# test_cards = [
#     (['W', 'U'], "Should be legal"),
#     (['W', 'U', 'G'], "Should be legal"), 
#     (['R'], "Should be illegal"),
#     (['B', 'G'], "Should be illegal"),
#     ([], "Colorless - should be legal")
# ]

# print(f"Arcades color identity: {arcades_colors}")
# for colors, description in test_cards:
#     legal = is_legal_in_deck(colors, arcades_colors)
#     print(f"{colors} - {description}: {'✅ Legal' if legal else '❌ Illegal'}")

Arcades colors: ['G', 'U', 'W']
Arcades color identity: ['G', 'U', 'W']


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [39]:
# First, let's examine how colors are stored in our data
arcades_row = df_clean[df_clean['name'].str.contains('Arcades, the Strategist', case=False)].iloc[0]
print(f"Arcades colors (raw): {repr(arcades_row['colors'])}")
print(f"Type of colors field: {type(arcades_row['colors'])}")

# Let's look at a few more examples
print("\nSample color data from other cards:")
for i in range(5):
    card = df_clean.iloc[i]
    print(f"{card['name']}: {repr(card['colors'])} (type: {type(card['colors'])})")

Arcades colors (raw): "['G', 'U', 'W']"
Type of colors field: <class 'str'>

Sample color data from other cards:
+2 Mace: "['W']" (type: <class 'str'>)
Aarakocra Sneak: "['U']" (type: <class 'str'>)
Aatchik, Emerald Radian: "['B', 'G']" (type: <class 'str'>)
Abaddon the Despoiler: "['B', 'R', 'U']" (type: <class 'str'>)
Abandoned Campground: '[]' (type: <class 'str'>)


In [40]:
import ast

def get_color_identity(colors_string):
    """Convert color string to set for easier comparison"""
    if pd.isna(colors_string) or colors_string == '[]':
        return set()
    
    try:
        # Convert string representation of list to actual list
        colors_list = ast.literal_eval(colors_string)
        return set(colors_list) if colors_list else set()
    except:
        return set()

def is_legal_in_deck(card_colors_string, commander_colors_string):
    """Check if card is legal in commander's color identity"""
    card_identity = get_color_identity(card_colors_string)
    commander_identity = get_color_identity(commander_colors_string)
    
    # Card is legal if all its colors are in commander's identity
    return card_identity.issubset(commander_identity)

# Test this function
arcades_colors = arcades_row['colors']
test_cases = [
    ("['W', 'U']", "White/Blue - Should be legal"),
    ("['W', 'U', 'G']", "Bant - Should be legal"), 
    ("['R']", "Red - Should be illegal"),
    ("['B', 'G']", "Black/Green - Should be illegal"),
    ("[]", "Colorless - Should be legal"),
    ("['W']", "Mono-white - Should be legal")
]

print(f"Arcades color identity: {get_color_identity(arcades_colors)}")
print("\nTesting color legality:")
for colors, description in test_cases:
    legal = is_legal_in_deck(colors, arcades_colors)
    print(f"{description}: {'✅ Legal' if legal else '❌ Illegal'}")

Arcades color identity: {'U', 'G', 'W'}

Testing color legality:
White/Blue - Should be legal: ✅ Legal
Bant - Should be legal: ✅ Legal
Red - Should be illegal: ❌ Illegal
Black/Green - Should be illegal: ❌ Illegal
Colorless - Should be legal: ✅ Legal
Mono-white - Should be legal: ✅ Legal


In [42]:
def find_similar_cards_with_colors(card_name, top_n=10):
    """
    Find cards similar to the input card using keyword-based features
    AND filter by color identity legality
    """
    # Find the card in our dataset
    card_matches = df_clean[df_clean['name'].str.contains(card_name, case=False, na=False)]
    
    if len(card_matches) == 0:
        return f"Card '{card_name}' not found in dataset"
    
    # Get the commander's info
    commander_row = card_matches.iloc[0]
    commander_idx = card_matches.index[0]
    commander_colors = commander_row['colors']
    commander_vector = keyword_matrix[commander_idx].reshape(1, -1)
    
    print(f"Commander: {commander_row['name']}")
    print(f"Color Identity: {get_color_identity(commander_colors)}")
    print("-" * 50)
    
    # Calculate similarity with all other cards
    similarities = cosine_similarity(commander_vector, keyword_matrix).flatten()
    
    # Get ALL cards sorted by similarity (we'll filter as we go)
    all_indices = similarities.argsort()[::-1]
    
    # Filter for color-legal cards and collect results
    results = []
    for idx in all_indices:
        if idx == commander_idx:  # Skip the commander itself
            continue
            
        card_row = df_clean.iloc[idx]
        card_colors = card_row['colors']
        
        # Check if card is legal in commander's color identity
        if is_legal_in_deck(card_colors, commander_colors):
            results.append({
                'name': card_row['name'],
                'similarity': similarities[idx],
                'type': card_row['type_line'],
                'colors': get_color_identity(card_colors),
                'text': card_row['oracle_text'][:500] + "...",
                'keywords': [keywords[i] for i in range(len(keywords)) if keyword_matrix[idx][i] > 0]
            })
            
            # Stop when we have enough recommendations
            if len(results) >= top_n:
                break
    
    return results

# Test the color-filtered recommendations!
print("Testing color-filtered recommendations for MacCready, Lamplight Mayor...")
recommendations = find_similar_cards_with_colors("MacCready, Lamplight Mayor", 25)

for i, card in enumerate(recommendations):
    print(f"{i+1}. {card['name']} (similarity: {card['similarity']:.3f})")
    print(f"   Colors: {card['colors']}")
    print(f"   Type: {card['type']}")
    print(f"   Shared keywords: {card['keywords']}")
    print(f"   Text: {card['text']}")
    print()

Testing color-filtered recommendations for MacCready, Lamplight Mayor...
Commander: MacCready, Lamplight Mayor
Color Identity: {'W', 'B'}
--------------------------------------------------
1. Unhinged Beast Hunt (similarity: 0.864)
   Colors: set()
   Type: Stickers
   Shared keywords: ['toughness', 'power', 'whenever', 'attacks', 'gain', 'life']
   Text: {TK}{TK} — {T}: You gain 1 life.
{TK}{TK}{TK}{TK} — Whenever this creature attacks, tap each creature an opponent controls with the same power and/or same toughness as this creature.
{TK}{TK} — 4/1
{TK}{TK}{TK} — 2/6...

2. Bounding Felidar (similarity: 0.864)
   Colors: {'W'}
   Type: Creature — Cat Beast Mount
   Shared keywords: ['counter', 'power', 'whenever', 'attacks', 'gain', 'life']
   Text: Whenever this creature attacks while saddled, put a +1/+1 counter on each other creature you control. You gain 1 life for each of those creatures.
Saddle 2 (Tap any number of other creatures you control with total power 2 or more: This Mou

In [43]:
# Let's test our system with a few different commander strategies
test_commanders = [
    "MacCready, Lamplight Mayor",
    "Gisa, the Hellraiser",
    "Go-Shintai of Life's Origin"
]

for commander in test_commanders:
    print(f"🔍 Testing recommendations for '{commander}'...")
    try:
        recs = find_similar_cards_with_colors(commander, 5)
        if isinstance(recs, str):  # Error message
            print(f"   {recs}")
        else:
            print(f"   Found {len(recs)} legal recommendations")
            if len(recs) > 0:
                print(f"   Top recommendation: {recs[0]['name']} (similarity: {recs[0]['similarity']:.3f})")
    except Exception as e:
        print(f"   Error: {e}")
    print()

🔍 Testing recommendations for 'MacCready, Lamplight Mayor'...
Commander: MacCready, Lamplight Mayor
Color Identity: {'W', 'B'}
--------------------------------------------------
   Found 5 legal recommendations
   Top recommendation: Unhinged Beast Hunt (similarity: 0.864)

🔍 Testing recommendations for 'Gisa, the Hellraiser'...
Commander: Gisa, the Hellraiser
Color Identity: {'B'}
--------------------------------------------------
   Found 5 legal recommendations
   Top recommendation: Tormod, the Desecrator (similarity: 0.866)

🔍 Testing recommendations for 'Go-Shintai of Life's Origin'...
Commander: Go-Shintai of Life's Origin
Color Identity: {'G'}
--------------------------------------------------
   Found 5 legal recommendations
   Top recommendation: Squirrel Sanctuary (similarity: 0.939)



In [44]:
def evaluate_recommendations(commander_name, expected_themes=None):
    """
    Evaluate the quality of recommendations for a given commander
    """
    recs = find_similar_cards_with_colors(commander_name, 10)
    
    if isinstance(recs, str):
        return f"Error: {recs}"
    
    print(f"📊 Evaluation for {commander_name}:")
    print(f"Number of recommendations: {len(recs)}")
    
    if len(recs) > 0:
        avg_similarity = sum(r['similarity'] for r in recs) / len(recs)
        print(f"Average similarity: {avg_similarity:.3f}")
        
        # Check diversity of card types
        card_types = set()
        for rec in recs:
            card_types.add(rec['type'].split(' — ')[0].split()[0])  # Get first word of type
        print(f"Card type diversity: {len(card_types)} different types")
        print(f"Types found: {', '.join(card_types)}")
        
        if expected_themes:
            theme_matches = 0
            for rec in recs:
                for theme in expected_themes:
                    if theme.lower() in ' '.join(rec['keywords']).lower():
                        theme_matches += 1
                        break
            print(f"Theme relevance: {theme_matches}/{len(recs)} cards match expected themes")
    
    return recs

# Test with Arcades and expected themes
arcades_themes = ['defender', 'toughness', 'draw']
evaluate_recommendations("Arcades, the Strategist", arcades_themes)

Commander: Arcades, the Strategist
Color Identity: {'U', 'G', 'W'}
--------------------------------------------------
📊 Evaluation for Arcades, the Strategist:
Number of recommendations: 10
Average similarity: 0.777
Card type diversity: 1 different types
Types found: Creature
Theme relevance: 10/10 cards match expected themes


[{'name': 'Corrupted Shapeshifter',
  'similarity': 0.8340576562282991,
  'type': 'Creature — Eldrazi Shapeshifter',
  'colors': set(),
  'text': 'Devoid (This card has no color.)\nAs this creature enters, it becomes your choice of a 3/3 creature with flying, a 2/5 creature with vigilance, or a 0/12 creature with defender....',
  'keywords': ['defender', 'flying', 'vigilance', 'enters']},
 {'name': 'Orator of Ojutai',
  'similarity': 0.7912565680749445,
  'type': 'Creature — Bird Monk',
  'colors': {'W'},
  'text': 'As an additional cost to cast this spell, you may reveal a Dragon card from your hand.\nDefender, flying\nWhen this creature enters, if you revealed a Dragon card or controlled a Dragon as you cast this spell, draw a card....',
  'keywords': ['defender', 'flying', 'draw', 'enters']},
 {'name': 'Flumph',
  'similarity': 0.7912565680749444,
  'type': 'Creature — Jellyfish',
  'colors': {'W'},
  'text': 'Defender, flying\nWhenever this creature is dealt damage, you and target 