In [4]:
# Cell 1: Imports and Setup
import json
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from pyedhrec import EDHRec
import time
import os

# Get the current notebook's directory
notebook_dir = os.path.dirname(os.path.abspath('__file__'))
csv_path = os.path.join(notebook_dir, '..', 'scripts', 'edhrec_commanders_complete.csv')
default_cards_path = os.path.join(notebook_dir, '..', 'scripts', 'viz_preparation', 'default-cards-20241223222017.json')

# Create client
edhrec = EDHRec()
commander = "The Ur-Dragon"

# Function to safely get data with error handling and delay
def get_card_data(func, card_name):
    try:
        time.sleep(1)  # Small delay between requests
        return func(card_name)
    except Exception as e:
        print(f"Error getting {func.__name__}: {e}")
        return None

# Get all top cards by type, including high synergy and new cards
card_data = {
    "High Synergy Cards": get_card_data(edhrec.get_high_synergy_cards, commander),
    "New Cards": get_card_data(edhrec.get_new_cards, commander),
    "Creatures": get_card_data(edhrec.get_top_creatures, commander),
    "Instants": get_card_data(edhrec.get_top_instants, commander),
    "Sorceries": get_card_data(edhrec.get_top_sorceries, commander),
    "Enchantments": get_card_data(edhrec.get_top_enchantments, commander),
    "Artifacts": get_card_data(edhrec.get_top_artifacts, commander),
    "Mana Artifacts": get_card_data(edhrec.get_top_mana_artifacts, commander),
    "Planeswalkers": get_card_data(edhrec.get_top_planeswalkers, commander),
    "Utility Lands": get_card_data(edhrec.get_top_utility_lands, commander),
    "Lands": get_card_data(edhrec.get_top_lands, commander)
}

# Print results
for category, cards in card_data.items():
    print(f"\n=== Top {category} ===")
    if cards:
        print(cards)
    else:
        print("No data retrieved")


=== Top High Synergy Cards ===
{'High Synergy Cards': [{'name': 'Miirym, Sentinel Wyrm', 'sanitized': 'miirym-sentinel-wyrm', 'sanitized_wo': 'miirym-sentinel-wyrm', 'url': '/cards/miirym-sentinel-wyrm', 'synergy': 0.72, 'inclusion': 22481, 'label': '83% of 27155 decks\n+72% synergy', 'num_decks': 22481, 'potential_decks': 27155}, {'name': 'Dragon Tempest', 'sanitized': 'dragon-tempest', 'sanitized_wo': 'dragon-tempest', 'url': '/cards/dragon-tempest', 'synergy': 0.7, 'inclusion': 22077, 'label': '79% of 28111 decks\n+70% synergy', 'num_decks': 22077, 'potential_decks': 28111}, {'name': 'Crux of Fate', 'sanitized': 'crux-of-fate', 'sanitized_wo': 'crux-of-fate', 'url': '/cards/crux-of-fate', 'synergy': 0.67, 'inclusion': 21522, 'label': '77% of 28111 decks\n+67% synergy', 'num_decks': 21522, 'potential_decks': 28111}, {'name': 'Atarka, World Render', 'sanitized': 'atarka-world-render', 'sanitized_wo': 'atarka-world-render', 'url': '/cards/atarka-world-render', 'synergy': 0.63, 'inclus

In [5]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import time

def scrape_edhrec_commanders():
    # Setup Selenium webdriver (Chrome)
    driver = webdriver.Chrome()
    url = "https://edhrec.com/commanders"
    
    try:
        # Load the page
        print("Loading initial page...")
        driver.get(url)
        time.sleep(3)  # Initial load wait
        
        commanders_data = []
        last_commander_count = 0
        
        # Keep clicking "Load More" until no new commanders are loaded
        while True:
            try:
                # Scroll to bottom of page
                driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(2)  # Wait for scroll to complete
                print("Scrolled to bottom of page...")
                
                # Try to find the Load More button
                try:
                    print("Looking for Load More button...")
                    load_more = WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.XPATH, "//button[contains(text(), 'Load More')]"))
                    )
                    print("Found Load More button!")
                    
                    # Scroll button into view
                    print("Scrolling button into view...")
                    driver.execute_script("arguments[0].scrollIntoView(true);", load_more)
                    time.sleep(1)
                    
                    # Click using JavaScript
                    print("Attempting to click button...")
                    driver.execute_script("arguments[0].click();", load_more)
                    print("Button clicked!")
                    time.sleep(2)  # Wait for new content to load
                    
                except Exception as button_error:
                    print(f"Button interaction error: {button_error}")
                    break
                
                # Get current commander count
                soup = BeautifulSoup(driver.page_source, 'html.parser')
                current_commanders = soup.find_all('div', class_='Card_container__Ng56K')
                current_count = len(current_commanders)
                
                print(f"Current commander count: {current_count}")
                print(f"Previous commander count: {last_commander_count}")
                
                # If no new commanders were loaded, break the loop
                if current_count == last_commander_count:
                    print("No new commanders loaded, finishing...")
                    break
                    
                print(f"Loaded {current_count} commanders...")
                last_commander_count = current_count
                
            except Exception as e:
                print(f"Loop iteration error: {e}")
                break
        
        # Parse all commanders
        print("\nParsing final commander list...")
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        for card in soup.find_all('div', class_='Card_container__Ng56K'):
            name = card.find('span', class_='Card_name__Mpa7S').text.strip()
            label = card.find('div', class_='CardLabel_label__iAM7T').text.strip()
            rank = int(label.split('Rank ')[1].split('\n')[0])
            deck_count = int(label.split('\n')[1].replace(' decks', '').replace(',', ''))
            
            commanders_data.append({
                'Rank': rank,
                'Commander': name,
                'Deck_Count': deck_count
            })
        
        # Create and format DataFrame
        df = pd.DataFrame(commanders_data)
        df = df.sort_values('Rank')
        df['Deck_Count'] = df['Deck_Count'].apply(lambda x: f"{x:,}")
        
        return df
        
    except Exception as e:
        print(f"Major error in scraping process: {e}")
        return None
        
    finally:
        print("Closing browser...")
        driver.quit()

# Run the scraper
print("Starting commander scrape...")
df = scrape_edhrec_commanders()

if df is not None:
    # Print summary
    print(f"\nTotal Commanders found: {len(df)}")
    
    # Print top 10
    print("\nTop 10 Most Popular Commanders:")
    print("=" * 60)
    for _, row in df.head(10).iterrows():
        print(f"#{row['Rank']}: {row['Commander']}")
        print(f"Deck Count: {row['Deck_Count']}")
        print("-" * 40)
    
    # Save to CSV
    df.to_csv('edhrec_commanders_complete.csv', index=False)
    print("\nFull data saved to 'edhrec_commanders_complete.csv'")

Starting commander scrape...
Loading initial page...
Loop iteration error: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=131.0.6778.205)
Stacktrace:
	GetHandleVerifier [0x00007FF6238E80D5+2992373]
	(No symbol) [0x00007FF62357BFD0]
	(No symbol) [0x00007FF62341590A]
	(No symbol) [0x00007FF6233EF4F5]
	(No symbol) [0x00007FF6234963A7]
	(No symbol) [0x00007FF6234AEE72]
	(No symbol) [0x00007FF62348F113]
	(No symbol) [0x00007FF62345A918]
	(No symbol) [0x00007FF62345BA81]
	GetHandleVerifier [0x00007FF623946A2D+3379789]
	GetHandleVerifier [0x00007FF62395C32D+3468109]
	GetHandleVerifier [0x00007FF623950043+3418211]
	GetHandleVerifier [0x00007FF6236DC78B+847787]
	(No symbol) [0x00007FF62358757F]
	(No symbol) [0x00007FF623582FC4]
	(No symbol) [0x00007FF62358315D]
	(No symbol) [0x00007FF623572979]
	BaseThreadInitThunk [0x00007FF8AF46259D+29]
	RtlUserThreadStart [0x00007FF8B124AF38+40]


Parsing final commander list...
Major erro

In [6]:
from pyedhrec import EDHRec
import json

# Create client
edhrec = EDHRec()
commander = "The Ur-Dragon"

# Get and save card details
details = edhrec.get_card_details(commander)
with open('card_details.json', 'w') as f:
    json.dump(details, f, indent=2)
print("Card details saved to 'card_details.json'")

# Get and save commander data
commander_data = edhrec.get_commander_data(commander)
with open('commander_data.json', 'w') as f:
    json.dump(commander_data, f, indent=2)
print("Commander data saved to 'commander_data.json'")

Card details saved to 'card_details.json'
Commander data saved to 'commander_data.json'


In [7]:
import json
import pandas as pd
from pyedhrec import EDHRec
import time
import os

# Get the current notebook's directory
notebook_dir = os.path.dirname(os.path.abspath('__file__'))
csv_path = os.path.join(notebook_dir, '..', 'scripts', 'edhrec_commanders_complete.csv')

# Load just top 10 commanders for testing
df = pd.read_csv(csv_path)
top_10_commanders = df.head(10)['Commander'].tolist()

# Initialize EDHRec client
edhrec = EDHRec()




def extract_tcgplayer_price(raw_data):
    """Extract just the TCGPlayer price from commander data"""
    try:
        # Navigate the nested structure
        container = raw_data.get('container', {})
        json_dict = container.get('json_dict', {})
        card = json_dict.get('card', {})
        prices = card.get('prices', {})
        tcgplayer = prices.get('tcgplayer', {})
        price = tcgplayer.get('price')
        
        return price
    except Exception as e:
        print(f"Error extracting price: {str(e)}")
        return None

# Test with a single commander
test_commander = "The Ur-Dragon"
raw_data = edhrec.get_commander_data(test_commander)

# Extract and print the price
price = extract_tcgplayer_price(raw_data)
print(f"\nTCGPlayer price for {test_commander}: ${price}")


TCGPlayer price for The Ur-Dragon: $25.47


In [8]:

def print_json_structure(data, indent=0, prefix=''):
    """Print the structure of a JSON object, showing only keys and data types"""
    if isinstance(data, dict):
        for key, value in data.items():
            print('  ' * indent + f"{prefix}{key}: {type(value).__name__}")
            if isinstance(value, (dict, list)):
                print_json_structure(value, indent + 1)
    elif isinstance(data, list) and len(data) > 0:
        # Show structure of first item in list as example
        print('  ' * indent + f"{prefix}[{type(data[0]).__name__}]")
        if isinstance(data[0], (dict, list)):
            print_json_structure(data[0], indent + 1)

# Get the raw data for a commander
test_commander = "The Ur-Dragon"
raw_data = edhrec.get_commander_data(test_commander)

print("\nJSON Structure Map:")
print_json_structure(raw_data)


JSON Structure Map:
creature: int
instant: int
sorcery: int
artifact: int
enchantment: int
battle: int
planeswalker: int
land: int
basic: int
nonbasic: int
archidekt: list
  [dict]
    c: str
    f: int
    q: int
    u: str
similar: list
  [dict]
    aetherhub_uri: str
    archidekt_uri: str
    color_identity: list
      [str]
    cmc: int
    deckstats_uri: str
    image_uris: list
      [dict]
        normal: str
        art_crop: str
    layout: str
    moxfield_uri: str
    mtggoldfish_uri: str
    name: str
    names: list
      [str]
    prices: dict
      cardhoarder: dict
        price: float
        url: str
      cardkingdom: dict
        price: float
        url: str
      cardmarket: dict
        price: float
        set: str
        url: str
      face2face: dict
        price: float
        url: str
      manapool: dict
        price: float
        slug: str
      mtgstocks: dict
        price: float
        url: str
      scg: dict
        price: float
        slug: s

In [9]:
import json
import pandas as pd  # Add this import
from pyedhrec import EDHRec
import time
import os


def extract_commander_data(commander_name, raw_data):
    """Extract all relevant commander data using the mapped structure"""
    try:
        # Get the container and json_dict paths which contain most of our data
        container = raw_data.get('container', {})
        json_dict = container.get('json_dict', {})
        card = json_dict.get('card', {})
        
        # Get card lists (groups of cards)
        cardlists = json_dict.get('cardlists', [])
        card_groups = {}
        for cardlist in cardlists:
            header = cardlist.get('header')
            if header:  # Only process if header exists
                cards = cardlist.get('cardviews', [])
                card_groups[header] = [
                    {
                        'name': c.get('name'),
                        'sanitized': c.get('sanitized'), 
                        'synergy': c.get('synergy'),
                        'inclusion': c.get('inclusion'),
                        'num_decks': c.get('num_decks')
                    } for c in cards
                ]

        # Extract deck count and rank from label
        label = card.get('label', '')
        deck_count = None
        rank = None
        if label:
            # Parse "28041 decks (0.554%)\nRank #2"
            try:
                deck_count = int(label.split(' ')[0])
                rank = int(label.split('#')[1])
            except:
                pass

        # Get tribes from panels/tribelinks
        tribes = []
        if 'panels' in raw_data:
            tribelinks = raw_data['panels'].get('tribelinks', [])
            tribes = [
                {
                    'name': tribe.get('value'),
                    'count': tribe.get('count')
                } for tribe in tribelinks
            ]

        return {
            "name": commander_name,
            "sanitized": card.get('sanitized'),
            "color_identity": card.get('color_identity', []),
            "tcgplayer_price": card.get('prices', {}).get('tcgplayer', {}).get('price'),
            "deck_count": deck_count,
            "rank": rank,
            "salt_score": card.get('salt'),
            "tribes": tribes,
            "card_groups": card_groups
        }
    except Exception as e:
        print(f"Error processing {commander_name}: {str(e)}")
        return None

def sanitize_commander_name(name):
    """Sanitize commander name to match EDHREC's format"""
    # Convert to lowercase
    sanitized = name.lower()
    
    # Handle partner commanders (replace '//' with a dash)
    if '//' in sanitized:
        parts = sanitized.split('//')
        # Clean each part and join with a dash
        parts = [part.strip() for part in parts]
        sanitized = '-'.join(parts)
    
    # Replace special characters
    replacements = {
        # Special characters
        'û': 'u',
        'ñ': 'n',
        'é': 'e',
        # Punctuation
        ',': '',
        "'": '',
        '.': '',
        '&': 'and',
        '"': '',  # Remove quotes
        # Spaces
        ' ': '-',
    }
    
    for old, new in replacements.items():
        sanitized = sanitized.replace(old, new)
    
    return sanitized

# Load and sanitize commander names - set number of commanders processed
df = pd.read_csv(csv_path)
top_200_commanders = df.head(50)['Commander'].tolist()

# Process all commanders with sanitized names
results = {}
total = len(top_200_commanders)

for idx, commander in enumerate(top_200_commanders, 1):
    sanitized_name = sanitize_commander_name(commander)
    print(f"Processing {commander} (sanitized: {sanitized_name})... ({idx}/{total})")
    
    try:
        raw_data = edhrec.get_commander_data(sanitized_name)
        time.sleep(1)  # Be nice to the API
        results[commander] = extract_commander_data(commander, raw_data)
    except Exception as e:
        print(f"Error processing {commander}: {e}")
        results[commander] = None
    
    # Save progress every 50 commanders
    if idx % 50 == 0:
        print(f"Saving progress... ({idx}/{total})")
        with open('extracted_commander_data_partial.json', 'w') as f:
            json.dump(results, f, indent=2)

# Save final results
output_path = os.path.join(notebook_dir, '..', 'data', 'extracted_commander_data.json')
os.makedirs(os.path.dirname(output_path), exist_ok=True)

print("\nSaving final results...")
with open(output_path, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\nData exported to: {output_path}")

# Print sample of first commander for verification
first_commander = top_200_commanders[0]
print(f"\nSample data for {first_commander}:")
print(json.dumps(results[first_commander], indent=2))

Processing Atraxa, Praetors' Voice (sanitized: atraxa-praetors-voice)... (1/50)
Processing The Ur-Dragon (sanitized: the-ur-dragon)... (2/50)
Processing Yuriko, the Tiger's Shadow (sanitized: yuriko-the-tigers-shadow)... (3/50)
Processing Krenko, Mob Boss (sanitized: krenko-mob-boss)... (4/50)
Processing Lathril, Blade of the Elves (sanitized: lathril-blade-of-the-elves)... (5/50)
Processing Kenrith, the Returned King (sanitized: kenrith-the-returned-king)... (6/50)
Processing Sauron, the Dark Lord (sanitized: sauron-the-dark-lord)... (7/50)
Processing Edgar Markov (sanitized: edgar-markov)... (8/50)
Processing Miirym, Sentinel Wyrm (sanitized: miirym-sentinel-wyrm)... (9/50)
Processing Kaalia of the Vast (sanitized: kaalia-of-the-vast)... (10/50)
Processing Isshin, Two Heavens as One (sanitized: isshin-two-heavens-as-one)... (11/50)
Processing Gishath, Sun's Avatar (sanitized: gishath-suns-avatar)... (12/50)
Processing Wilhelt, the Rotcleaver (sanitized: wilhelt-the-rotcleaver)... (13