# Data Ingestion Code (From RiftCodex)

## Loading Supabase

In [1]:
import os
from dotenv import load_dotenv
from supabase import create_client, Client

# Load environment variables
load_dotenv()

# Access keys
supabase: Client = create_client(
    supabase_url=os.getenv("SUPABASE_URL"),
    supabase_key=os.getenv("SUPABASE_SERVICE_ROLE_KEY")
)

## Set Data Ingestion

In [2]:
import http.client

conn = http.client.HTTPSConnection("api.riftcodex.com")
payload = ''
headers = {
  'Accept': 'application/json'
}
conn.request("GET", "/sets", payload, headers)
res = conn.getresponse()
data = res.read()
data = data.decode("utf-8")

In [None]:
import json
set_data = json.loads(data)
set_data

{'items': [{'id': '23d4057d-cf01-4133-97ca-e8b641b13ac9',
   'name': 'Origins: Proving Grounds',
   'set_id': 'OGS',
   'label': 'Proving Grounds',
   'card_count': 24,
   'tcgplayer_id': '24439',
   'cardmarket_id': '6289',
   'publish_date': '2025-10-31T00:00:00Z',
   'update_date': ''},
  {'id': '76bc5811-b56c-40ec-9343-c00a0785a9d5',
   'name': 'Spiritforged',
   'set_id': 'SFD',
   'label': 'SFD',
   'card_count': 280,
   'tcgplayer_id': '24519',
   'cardmarket_id': '',
   'publish_date': '2026-02-13T00:00:00Z',
   'update_date': ''},
  {'id': '0fe3414b-c530-4a4d-b86c-168b323e8532',
   'name': 'Origins',
   'set_id': 'OGN',
   'label': 'Origins',
   'card_count': 352,
   'tcgplayer_id': '24344',
   'cardmarket_id': '6286',
   'publish_date': '2025-10-31T00:00:00Z',
   'update_date': ''}],
 'total': 3,
 'page': 1,
 'size': 50,
 'pages': 1}

In [8]:
items = set_data["items"]
items

[{'id': '23d4057d-cf01-4133-97ca-e8b641b13ac9',
  'name': 'Origins: Proving Grounds',
  'set_id': 'OGS',
  'label': 'Proving Grounds',
  'card_count': 24,
  'tcgplayer_id': '24439',
  'cardmarket_id': '6289',
  'publish_date': '2025-10-31T00:00:00Z',
  'update_date': ''},
 {'id': '76bc5811-b56c-40ec-9343-c00a0785a9d5',
  'name': 'Spiritforged',
  'set_id': 'SFD',
  'label': 'SFD',
  'card_count': 280,
  'tcgplayer_id': '24519',
  'cardmarket_id': '',
  'publish_date': '2026-02-13T00:00:00Z',
  'update_date': ''},
 {'id': '0fe3414b-c530-4a4d-b86c-168b323e8532',
  'name': 'Origins',
  'set_id': 'OGN',
  'label': 'Origins',
  'card_count': 352,
  'tcgplayer_id': '24344',
  'cardmarket_id': '6286',
  'publish_date': '2025-10-31T00:00:00Z',
  'update_date': ''}]

In [11]:
filtered_data = [{
    "id": item["id"],
    "set_name": item["name"],
    "set_id": item["set_id"],
    "set_label": item["label"],
    "card_count": item["card_count"],
    "set_publish_date": item["publish_date"]
} for item in items]
response = supabase.table('set').insert(filtered_data).execute()

## Card Data Ingestion

In [2]:
import http.client
import json

conn = http.client.HTTPSConnection("api.riftcodex.com")
headers = {
    'Accept': 'application/json'
}

all_cards = []
page = 1
total_pages = None

while True:
    # Request with page parameter
    conn.request("GET", f"/cards?size=100&sort=public_code&page={page}", '', headers)
    res = conn.getresponse()
    data = res.read().decode("utf-8")
    
    card_data = json.loads(data)
    
    # Add cards from this page
    all_cards.extend(card_data['items'])  # Adjust 'data' to match actual response key
    
    # Check if there are more pages (adjust based on API response structure)
    if total_pages is None:
        total_pages = card_data.get('pages')
    
    print(f"Fetched page {page}/{total_pages}")
    
    # Break if last page
    if page >= total_pages or len(card_data['items']) == 0:
        break
    
    page += 1
    conn = http.client.HTTPSConnection("api.riftcodex.com")  # Reconnect for next request

print(f"Total cards fetched: {len(all_cards)}")

Fetched page 1/7
Fetched page 2/7
Fetched page 3/7
Fetched page 4/7
Fetched page 5/7
Fetched page 6/7
Fetched page 7/7
Total cards fetched: 656


In [16]:
all_cards[30]

{'id': '3cbb577b-7a8e-41e1-9ab2-b3a2a00da64f',
 'name': 'Falling Star',
 'riftbound_id': 'ogn-029-298',
 'tcgplayer_id': '652801',
 'public_code': 'OGN-029/298',
 'collector_number': 29,
 'attributes': {'energy': 2, 'might': None, 'power': 2},
 'classification': {'type': 'Spell',
  'supertype': None,
  'rarity': 'Rare',
  'domain': ['Fury']},
 'text': {'rich': '<p>Do this twice:<br />Deal 3 to a unit. <em>(You can choose different units.)</em></p>',
  'plain': 'Do this twice:Deal 3 to a unit. (You can choose different units.)'},
 'set': {'set_id': 'OGN', 'label': 'Origins'},
 'media': {'image_url': 'https://cmsassets.rgpub.io/sanity/images/dsfx7636/game_data_live/9cf2d2e59e1bf839cdf5c2a77e95f5d1e871788f-744x1039.png',
  'artist': 'Kudos Productions',
  'accessibility_text': 'Riftbound Spell: Falling Star. Do this twice:\nDeal 3 to a unit. (You can choose different units.)'},
 'tags': [],
 'orientation': 'portrait',
 'metadata': {'clean_name': 'Falling Star',
  'alternate_art': False,
 

In [14]:
import requests
from pathlib import Path
from tqdm import tqdm
import time

session = requests.Session()

for card in tqdm(all_cards):
    try:
        original_url = card["media"]["image_url"]
        
        # Download image
        response = session.get(original_url, timeout=20)
        response.raise_for_status()
        image_data = response.content
        
        # Detect file extension from URL or content-type
        ext = Path(original_url).suffix or '.jpg'
        if 'content-type' in response.headers:
            content_type = response.headers['content-type']
            if 'png' in content_type:
                ext = '.png'
            elif 'webp' in content_type:
                ext = '.webp'

        # Create filename
        set_id = card["set"]["set_id"]
        card_id = card["riftbound_id"][len(set_id)+1:]
        filename = f"{set_id}/{card_id}{ext}"
        
        # Check if file already exists
        card["new_image_url"] = supabase.storage.from_("card-images").get_public_url(filename)
        try:
            # Quick check if URL is accessible (file exists)
            check = session.head(card["new_image_url"], timeout=5)
            if check.status_code == 200:
                continue  # File exists, skip to next card
        except Exception as e:
            pass

        # Get public URL and add to object
        card["new_image_url"] = supabase.storage.from_("card-images").get_public_url(filename)

        time.sleep(1)

        # Upload to Supabase
        supabase.storage.from_("card-images").upload(
            filename,
            image_data,
            file_options={"content-type": response.headers.get('content-type', 'image/jpeg')}
        )
        

    except Exception as e:
        if '409' not in str(e):
            print(f"Failed for {card.get('name', 'unknown')}: {e}")
        card["new_image_url"] = None

100%|██████████| 656/656 [20:03<00:00,  1.83s/it]


In [17]:
import json

# Save to JSON file
with open('all_cards.json', 'w', encoding='utf-8') as f:
    json.dump(all_cards, f, indent=2, ensure_ascii=False)

print(f"Saved {len(all_cards)} cards to all_cards.json")

Saved 656 cards to all_cards.json


In [5]:
all_cards[0]["media"]

{'image_url': 'https://cmsassets.rgpub.io/sanity/images/dsfx7636/game_data_live/15ed971e4029a92b362a81ccadf309fb81e40b81-744x1039.png',
 'artist': 'Envar Studio',
 'accessibility_text': 'Riftbound Unit: Blazing Scorcher. [Accelerate] (You may pay [1][C] as an additional cost to have me enter ready.)'}

In [20]:
filtered_card_data = [{
    "id": card["id"],
    "set_id": card["set"]["set_id"],
    "card_number": card["collector_number"],
    "card_id": card["riftbound_id"],
    "public_code": card["public_code"],
    "card_name": card["name"],
    "attr_energy": card["attributes"]["energy"],
    "attr_power": card["attributes"]["power"],
    "attr_might": card["attributes"]["might"],
    "card_type": card["classification"]["type"],
    "card_supertype": card["classification"]["supertype"],
    "card_rarity": card["classification"]["rarity"],
    "card_domain": card["classification"]["domain"],
    "text_rich": card["text"]["rich"],
    "text_plain": card["text"]["plain"],
    "card_artist": card["media"]["artist"],
    "card_tags": card["tags"],
    "orientation": card["orientation"],
    "alternate_art": card["metadata"]["alternate_art"],
    "overnumbered": card["metadata"]["overnumbered"],
    "signature": card["metadata"]["signature"],
    "card_image_url": card["new_image_url"],
} for card in all_cards]

In [22]:
response = supabase.table('card').insert(filtered_card_data).execute()
