In [1]:
import mysql.connector
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics.pairwise import cosine_similarity

## Downloand database

In [3]:
def connect_to_database():
    try:
        connection = mysql.connector.connect(
            host='localhost',
            database='magic_cards',
            user='root',
            password='ThisWasMyPassword'
        )
        if connection.is_connected():
            print("Connected to MySQL database")
            return connection
    except mysql.connector.Error as e:
        print(f"Error while connecting to MySQL: {e}")
    return None

def export_to_csv(connection, filename='magic_cards.csv'):
    query = """
    SELECT name, colorIdentity, keywords, supertypes
    FROM cards
    """
    df = pd.read_sql(query, connection)
    df.to_csv(filename, index=False)
    print(f"Data exported to {filename}")



In [4]:
# Main execution
connection = connect_to_database()
if connection:
    export_to_csv(connection)
    connection.close()
else:
    print("Failed to connect to the database.")

Connected to MySQL database


  df = pd.read_sql(query, connection)


Data exported to magic_cards.csv


## Analysis portion

In [5]:
def load_card_data(filename='magic_cards.csv'):
    return pd.read_csv(filename)

def prepare_features(df):
    # Process colors
    color_map = {'W': 'White', 'U': 'Blue', 'B': 'Black', 'R': 'Red', 'G': 'Green'}
    df['colorIdentity'] = df['colorIdentity'].fillna('').apply(lambda x: [color_map.get(c.strip(), c.strip()) for c in x.split(',')] if isinstance(x, str) else [])
    mlb_colors = MultiLabelBinarizer()
    color_features = pd.DataFrame(
        mlb_colors.fit_transform(df['colorIdentity']),
        columns=mlb_colors.classes_,
        index=df.index
    )
    
    # Process keywords
    df['keywords'] = df['keywords'].fillna('').apply(lambda x: [kw.strip() for kw in x.split(',')] if isinstance(x, str) else [])
    mlb_keywords = MultiLabelBinarizer()
    keyword_features = pd.DataFrame(
        mlb_keywords.fit_transform(df['keywords']),
        columns=mlb_keywords.classes_,
        index=df.index
    )
    
    # Process supertypes
    df['is_legendary'] = df['supertypes'].fillna('').str.contains('Legendary')
    
    # Combine features
    features = pd.concat([color_features, keyword_features, df[['is_legendary']]], axis=1)
    
    return features, mlb_colors, mlb_keywords

def cluster_cards(features, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, random_state=69, n_init=10)
    return kmeans.fit_predict(features)

def find_related_cards(df, features, legendary_card_name, top_n=10):
    legendary_card = df[df['name'] == legendary_card_name].index[0]
    legendary_features = features.iloc[legendary_card].values.reshape(1, -1)
    
    similarities = cosine_similarity(legendary_features, features)
    similar_indices = similarities[0].argsort()[::-1][1:top_n+1]
    
    return df.iloc[similar_indices]

In [6]:
df = load_card_data()
features, mlb_colors, mlb_keywords = prepare_features(df)

# Perform clustering
clusters = cluster_cards(features)
df['cluster'] = clusters

In [8]:
print("Sample of clustered cards:")
print(df[['name', 'colorIdentity', 'keywords', 'is_legendary', 'cluster']].head(5))

Sample of clustered cards:
                name colorIdentity        keywords  is_legendary  cluster
0  Ancestor's Chosen       [White]  [First strike]         False        7
1  Ancestor's Chosen       [White]  [First strike]         False        7
2     Angel of Mercy       [White]        [Flying]         False        7
3     Angel of Mercy       [White]        [Flying]         False        7
4   Angelic Blessing       [White]              []         False        9


In [9]:
# Find related cards for a legendary creature
legendary_card_name = "Reya Dawnbringer"  # An example from your data
if legendary_card_name in df['name'].values:
    related_cards = find_related_cards(df, features, legendary_card_name)
    print(f"\nCards related to {legendary_card_name}:")
    print(related_cards[['name', 'colorIdentity', 'keywords', 'is_legendary']])
else:
    print(f"Legendary creature '{legendary_card_name}' not found in the database.")


Cards related to Reya Dawnbringer:
                              name colorIdentity  keywords  is_legendary
49                Reya Dawnbringer       [White]  [Flying]          True
48                Reya Dawnbringer       [White]  [Flying]          True
73894  Mavinda, Students' Advocate       [White]  [Flying]          True
33564     Lulu, Helpful Hollyphant       [White]  [Flying]          True
64390              Celestial Kirin       [White]  [Flying]          True
54848   Teshar, Ancestor's Apostle       [White]  [Flying]          True
56006            Lieutenant Kirtar       [White]  [Flying]          True
84857              Celestial Kirin       [White]  [Flying]          True
56375       Linvala, the Preserver       [White]  [Flying]          True
85669  Mavinda, Students' Advocate       [White]  [Flying]          True


In [10]:
# Print some statistics
print("\nCluster Statistics:")
print(df['cluster'].value_counts())

print("\nMost common keywords:")
print(df['keywords'].explode().value_counts().head(10))

print("\nColor distribution:")
print(df['colorIdentity'].explode().value_counts())


Cluster Statistics:
cluster
0    17150
6    15621
2    13611
3    11453
4    10814
9     9237
7     7526
8     6368
5     3085
1     2828
Name: count, dtype: int64

Most common keywords:
keywords
             57286
Flying        8811
Enchant       3294
Trample       2441
Haste         1784
Vigilance     1675
Equip         1463
Mill          1427
Flash         1384
Scry          1345
Name: count, dtype: int64

Color distribution:
colorIdentity
Green    22038
Black    21896
White    21690
Blue     21666
Red      21650
         10814
Name: count, dtype: int64
