In [1]:
# Install packages we need
!pip install requests pandas



In [13]:
import requests
import json

# Test connection to Scryfall API
response = requests.get("https://api.scryfall.com/cards/search?q=is:commander")
print(f"Status Code: {response.status_code}")
print(f"Found {response.json()['total_cards']} commanders")

Status Code: 200
Found 2703 commanders


In [14]:
# Get a specific commander to understand the data structure
response = requests.get("https://api.scryfall.com/cards/named?exact=Atraxa, Praetors' Voice")
card_data = response.json()

# Pretty print the structure
print(json.dumps(card_data, indent=2)[:2000])  # First 2000 characters
print("...")
print(f"Total keys in card data: {len(card_data.keys())}")
print(f"Available keys: {list(card_data.keys())}")

{
  "object": "card",
  "id": "d0d33d52-3d28-4635-b985-51e126289259",
  "oracle_id": "7e6b9b59-cd68-4e3c-827b-38833c92d6eb",
  "multiverse_ids": [
    489863
  ],
  "mtgo_id": 82414,
  "tcgplayer_id": 214833,
  "cardmarket_id": 462559,
  "name": "Atraxa, Praetors' Voice",
  "lang": "en",
  "released_at": "2020-08-07",
  "uri": "https://api.scryfall.com/cards/d0d33d52-3d28-4635-b985-51e126289259",
  "scryfall_uri": "https://scryfall.com/card/2xm/190/atraxa-praetors-voice?utm_source=api",
  "layout": "normal",
  "highres_image": true,
  "image_status": "highres_scan",
  "image_uris": {
    "small": "https://cards.scryfall.io/small/front/d/0/d0d33d52-3d28-4635-b985-51e126289259.jpg?1599707796",
    "normal": "https://cards.scryfall.io/normal/front/d/0/d0d33d52-3d28-4635-b985-51e126289259.jpg?1599707796",
    "large": "https://cards.scryfall.io/large/front/d/0/d0d33d52-3d28-4635-b985-51e126289259.jpg?1599707796",
    "png": "https://cards.scryfall.io/png/front/d/0/d0d33d52-3d28-4635-b985-5

In [16]:
# Extract the fields we'll need for our recommender
important_fields = {
    'name': card_data.get('name'),
    'mana_cost': card_data.get('mana_cost'),
    'type_line': card_data.get('type_line'),
    'oracle_text': card_data.get('oracle_text'),
    'colors': card_data.get('colors'),
    'keywords': card_data.get('keywords'),
    'power': card_data.get('power'),
    'toughness': card_data.get('toughness'),
    'cmc': card_data.get('cmc')  # converted mana cost
}

for key, value in important_fields.items():
    print(f"{key}: {value}")

name: Atraxa, Praetors' Voice
mana_cost: {G}{W}{U}{B}
type_line: Legendary Creature — Phyrexian Angel Horror
oracle_text: Flying, vigilance, deathtouch, lifelink
At the beginning of your end step, proliferate. (Choose any number of permanents and/or players, then give each another counter of each kind already there.)
colors: ['B', 'G', 'U', 'W']
keywords: ['Deathtouch', 'Flying', 'Lifelink', 'Vigilance', 'Proliferate']
power: 4
toughness: 4
cmc: 4.0


In [17]:
import pandas as pd
import time

def get_affordable_cards():
    all_cards = []
    page = 1
    
    while True:
        # Search for cards under $5 that are legal in Commander
        url = f"https://api.scryfall.com/cards/search?q=legal:commander+usd<5&page={page}"
        response = requests.get(url)
        
        if response.status_code != 200:
            break
            
        data = response.json()
        all_cards.extend(data['data'])
        
        print(f"Page {page}: Got {len(data['data'])} cards. Total so far: {len(all_cards)}")
        
        if not data.get('has_more'):
            break
            
        page += 1
        time.sleep(0.03)  # Be nice to the API
        
        # Limit for testing - remove this later
        # if page > 25:  # Just get first 3 pages for now
        #    break
    
    return all_cards

# This might take a minute or two
cards_data = get_affordable_cards()
print(f"Total cards collected: {len(cards_data)}")

Page 1: Got 175 cards. Total so far: 175
Page 2: Got 175 cards. Total so far: 350
Page 3: Got 175 cards. Total so far: 525
Page 4: Got 175 cards. Total so far: 700
Page 5: Got 175 cards. Total so far: 875
Page 6: Got 175 cards. Total so far: 1050
Page 7: Got 175 cards. Total so far: 1225
Page 8: Got 175 cards. Total so far: 1400
Page 9: Got 175 cards. Total so far: 1575
Page 10: Got 175 cards. Total so far: 1750
Page 11: Got 175 cards. Total so far: 1925
Page 12: Got 175 cards. Total so far: 2100
Page 13: Got 175 cards. Total so far: 2275
Page 14: Got 175 cards. Total so far: 2450
Page 15: Got 175 cards. Total so far: 2625
Page 16: Got 175 cards. Total so far: 2800
Page 17: Got 175 cards. Total so far: 2975
Page 18: Got 175 cards. Total so far: 3150
Page 19: Got 175 cards. Total so far: 3325
Page 20: Got 175 cards. Total so far: 3500
Page 21: Got 175 cards. Total so far: 3675
Page 22: Got 175 cards. Total so far: 3850
Page 23: Got 175 cards. Total so far: 4025
Page 24: Got 175 cards. T

In [18]:
# Create DataFrame with the fields we need
df = pd.DataFrame(cards_data)

print(f"DataFrame shape: {df.shape}")
print(f"Columns available: {df.columns.tolist()}")

# Look at the first few cards
print("\nFirst 3 card names:")
print(df[['name', 'type_line', 'oracle_text']].head(3))

DataFrame shape: (16275, 80)
Columns available: ['object', 'id', 'oracle_id', 'multiverse_ids', 'mtgo_id', 'arena_id', 'tcgplayer_id', 'cardmarket_id', 'name', 'lang', 'released_at', 'uri', 'scryfall_uri', 'layout', 'highres_image', 'image_status', 'image_uris', 'mana_cost', 'cmc', 'type_line', 'oracle_text', 'colors', 'color_identity', 'keywords', 'legalities', 'games', 'reserved', 'game_changer', 'foil', 'nonfoil', 'finishes', 'oversized', 'promo', 'reprint', 'variation', 'set_id', 'set', 'set_name', 'set_type', 'set_uri', 'set_search_uri', 'scryfall_set_uri', 'rulings_uri', 'prints_search_uri', 'collector_number', 'digital', 'rarity', 'flavor_text', 'card_back_id', 'artist', 'artist_ids', 'illustration_id', 'border_color', 'frame', 'full_art', 'textless', 'booster', 'story_spotlight', 'edhrec_rank', 'preview', 'prices', 'related_uris', 'purchase_uris', 'power', 'toughness', 'all_parts', 'frame_effects', 'security_stamp', 'penny_rank', 'promo_types', 'produced_mana', 'mtgo_foil_id', 

In [19]:
# Remove cards without oracle text (lands, some artifacts)
df_clean = df[df['oracle_text'].notna()].copy()

# Add a simplified text column for ML processing
df_clean['combined_text'] = (
    df_clean['oracle_text'].fillna('') + ' ' + 
    df_clean['type_line'].fillna('') + ' ' +
    df_clean['keywords'].astype(str)
)

print(f"Cards with oracle text: {len(df_clean)}")
print(f"Sample combined text:")
print(df_clean['combined_text'].iloc[0][:200] + "...")

Cards with oracle text: 15839
Sample combined text:
Equipped creature gets +2/+2.
Equip {3} ({3}: Attach to target creature you control. Equip only as a sorcery.) Artifact — Equipment ['Equip']...


In [8]:
# Install our ML libraries
!pip install tensorflow scikit-learn matplotlib seaborn



In [9]:
# Install TensorFlow for Apple Silicon
!pip install tensorflow-macos tensorflow-metal scikit-learn matplotlib seaborn



In [10]:
# Test TensorFlow installation
import tensorflow as tf
import sklearn
import pandas as pd
import numpy as np

print(f"TensorFlow version: {tf.__version__}")
print(f"Scikit-learn version: {sklearn.__version__}")
print(f"GPU available: {tf.config.list_physical_devices('GPU')}")
print("All libraries imported successfully!")

Matplotlib is building the font cache; this may take a moment.


TensorFlow version: 2.16.2
Scikit-learn version: 1.7.0
GPU available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
All libraries imported successfully!


In [20]:
import requests
import pandas as pd
import time

# Rerun the data collection (this should be familiar now)
def get_affordable_cards(max_pages=2500):
    all_cards = []
    page = 1
    
    while True:
        url = f"https://api.scryfall.com/cards/search?q=legal:commander+usd<10&page={page}"
        response = requests.get(url)
        
        if response.status_code != 200:
            print(f"Error on page {page}: {response.status_code}")
            break
            
        data = response.json()
        all_cards.extend(data['data'])
        
        print(f"Page {page}: Got {len(data['data'])} cards. Total so far: {len(all_cards)}")
        
        if not data.get('has_more') or page > max_pages:
            break
            
        page += 1
        time.sleep(0.025)
    
    return all_cards

# Collect data and save it
print("Re-collecting data...")
cards_data = get_affordable_cards(2500)

Re-collecting data...
Page 1: Got 175 cards. Total so far: 175
Page 2: Got 175 cards. Total so far: 350
Page 3: Got 175 cards. Total so far: 525
Page 4: Got 175 cards. Total so far: 700
Page 5: Got 175 cards. Total so far: 875
Page 6: Got 175 cards. Total so far: 1050
Page 7: Got 175 cards. Total so far: 1225
Page 8: Got 175 cards. Total so far: 1400
Page 9: Got 175 cards. Total so far: 1575
Page 10: Got 175 cards. Total so far: 1750
Page 11: Got 175 cards. Total so far: 1925
Page 12: Got 175 cards. Total so far: 2100
Page 13: Got 175 cards. Total so far: 2275
Page 14: Got 175 cards. Total so far: 2450
Page 15: Got 175 cards. Total so far: 2625
Page 16: Got 175 cards. Total so far: 2800
Page 17: Got 175 cards. Total so far: 2975
Page 18: Got 175 cards. Total so far: 3150
Page 19: Got 175 cards. Total so far: 3325
Page 20: Got 175 cards. Total so far: 3500
Page 21: Got 175 cards. Total so far: 3675
Page 22: Got 175 cards. Total so far: 3850
Page 23: Got 175 cards. Total so far: 4025
Pag

OSError: Cannot save file into a non-existent directory: '../data'

In [22]:
# Save to file so we don't lose it again!
import os

# Create the data directory if it doesn't exist
os.makedirs('data', exist_ok=True)

df = pd.DataFrame(cards_data)
df.to_csv('data/mtg_cards_raw.csv', index=False)
print(f"Saved {len(cards_data)} cards to data/mtg_cards_raw.csv!")

(28342, 82)

In [25]:
# Also save just the fields we need for ML
df_clean = df[df['oracle_text'].notna()].copy()
df_clean['combined_text'] = (
    df_clean['oracle_text'].fillna('') + ' ' + 
    df_clean['type_line'].fillna('') + ' ' +
    df_clean['keywords'].astype(str)
)

df_clean.to_csv('data/mtg_cards_clean.csv', index=False)
print(f"Saved {len(df_clean)} cleaned cards!")

Saved 27623 cleaned cards!
