# Firecrawl vs SupaCrawler: Complete Performance Comparison

This notebook compares Firecrawl and SupaCrawler for both single-page scraping and multi-page crawling. We focus on performance, cost efficiency, and API design differences.

In [None]:
# Installation requirements
# !pip install firecrawl-py
# !pip install supacrawler

In [22]:
import time
import os
from supacrawler import SupacrawlerClient
from firecrawl import FirecrawlApp
from dotenv import load_dotenv

load_dotenv()

True

## Part 1: Single Page Scraping Comparison

In [71]:
def firecrawl_scrape(url: str):
    start = time.time()
    client = Firecrawl(api_key=os.environ.get("FIRECRAWL_API_KEY"))

    try:
        # Adjust based on what your SDK supports — here we use minimal supported args
        doc = client.scrape(url, formats=["markdown", "html"], max_age=0)

        # `doc` is Document-like
        title = doc.metadata.title if doc.metadata and hasattr(doc.metadata, "title") else "No title"
        markdown_content = doc.markdown or ""
        if len(markdown_content) > 200:
            content_preview = markdown_content[:200] + "..."
        else:
            content_preview = markdown_content or "No content"

        return {
            "title": title,
            "content": content_preview,
            "time": time.time() - start,
        }

    except Exception as e:
        return {
            "error": str(e),
            "time": time.time() - start,
        }


# Test code
test_url = 'https://example.com'

print("Single Page Scraping Comparison")
print("=" * 35)
print(f"Test URL: {test_url}")
print()

print("Firecrawl:")
firecrawl_result = firecrawl_scrape(test_url)
if 'error' in firecrawl_result:
    print(f"Error: {firecrawl_result['error']}")
else:
    print(f"Title: {firecrawl_result['title']}")
    print(f"Time: {firecrawl_result['time']:.2f}s")
print(f"API design: {firecrawl_result.get('api_design')}")
print()


Single Page Scraping Comparison
Test URL: https://example.com

Firecrawl:
Title: Example Domain
Time: 1.32s
API design: None



In [None]:
# Test code
test_url = 'https://example.com'
def supacrawler_scrape(url):
    """Single page scraping with SupaCrawler"""
    start = time.time()
    
    client = SupacrawlerClient(api_key=os.environ.get('SUPACRAWLER_API_KEY'))
    response = client.scrape(url, format='markdown', fresh=True)
    
    title = response.metadata.title if response.metadata else 'No title'
    content = response.content if response.content else "No content"
    
    return {
        'title': title,
        'content': content[:200] + "..." if len(content) > 200 else content,
        'metadata': response.metadata,
        'time': time.time() - start,
        'resource_usage': 'Zero local resources'
    }
    
print("SupaCrawler:")
sc_result = supacrawler_scrape(test_url)
print(f"Title: {sc_result['title']}")
print(f"Time: {sc_result['time']:.2f}s")
print()

if 'error' not in firecrawl_result and firecrawl_result['time'] > 0 and sc_result['time'] > 0:
    ratio = firecrawl_result['time'] / sc_result['time']
    print(f"Performance: SupaCrawler is {ratio:.1f}x faster")

SupaCrawler:
Title: Example Domain
Time: 1.04s

Performance: SupaCrawler is 1.3x faster


## Part 2: Multi-Page Crawling Comparison

In [50]:
import os
import time
from firecrawl import Firecrawl

def firecrawl_crawl_minimal(start_url, max_pages=5):
    """Minimal Firecrawl v2 crawl; waits until crawl completes."""
    start_time = time.time()
    client = Firecrawl(api_key=os.environ.get("FIRECRAWL_API_KEY"))

    try:
        # Using the crawl waiter API
        crawl_status = client.crawl(
            url=start_url,
            limit=max_pages,
            scrape_options={
                "formats": ["markdown"],
                # omit only_main_content or leave default
            },
            poll_interval=0.1,   # seconds between status checks
            timeout=60          # total seconds before timeout
        )

        # If crawl returned an object with success flag
        if not getattr(crawl_status, "success", False):
            return {
                "pages_crawled": 0,
                "total_time": time.time() - start_time,
                "error": f"Crawl failed: {getattr(crawl_status, 'error', 'Unknown error')}"
            }

        data = getattr(crawl_status, "data", []) or []
        count = len(data)
        end_time = time.time()

        return {
            "pages_crawled": count,
            "total_time": end_time - start_time,
            "avg_time_per_page": (end_time - start_time) / count if count else 0,
            "pages": data
        }

    except Exception as e:
        return {
            "pages_crawled": 0,
            "total_time": time.time() - start_time,
            "error": str(e)
        }


In [44]:
def supacrawler_crawl(start_url, max_pages=5):
    """Built-in crawling with SupaCrawler (SDK-native usage)"""
    start_time = time.time()
    client = SupacrawlerClient(api_key=os.environ.get('SUPACRAWLER_API_KEY'))

    try:
        job = client.create_crawl_job(
            url=start_url,
            format="markdown",
            link_limit=max_pages,
            depth=3,
            include_subdomains=False,
            render_js=True,
            fresh=True # fresh never uses cached results
        )

        crawl_output = client.wait_for_crawl(
            job.job_id,
            interval_seconds=2.0,
            timeout_seconds=120.0
        )

        crawl_data = crawl_output.data.crawl_data
        end_time = time.time()

        return {
            "pages_crawled": len(crawl_data),
            "total_time": end_time - start_time,
            "avg_time_per_page": (end_time - start_time) / len(crawl_data) if crawl_data else 0,
            "crawl_data": crawl_data  # keep the native objects
        }

    except Exception as e:
        end_time = time.time()
        return {
            "pages_crawled": 0,
            "total_time": end_time - start_time,
            "error": str(e)
        }

In [51]:
# Test crawling
crawl_url = "https://docs.python.org"
max_pages = 5

print("Multi-Page Crawling Comparison")
print("=" * 35)
print(f"Test URL: {crawl_url}")
print(f"Max pages: {max_pages}")
print()

print("Firecrawl job-based crawling:")
firecrawl_crawl_result = firecrawl_crawl(crawl_url, max_pages)

if 'error' in firecrawl_crawl_result:
    print(f"Error: {firecrawl_crawl_result['error']}")
else:
    print(f"Pages crawled: {firecrawl_crawl_result['pages_crawled']}")
    print(f"Total time: {firecrawl_crawl_result['total_time']:.2f}s")
    print(f"Average per page: {firecrawl_crawl_result['avg_time_per_page']:.2f}s")

print(f"API design: Job creation → polling → results")
print(f"Cost model: Credit-based with usage limits")
print()

Multi-Page Crawling Comparison
Test URL: https://docs.python.org
Max pages: 5

Firecrawl job-based crawling:
Error: Crawl failed: Unknown
API design: Job creation → polling → results
Cost model: Credit-based with usage limits



In [75]:
import os
import time
from firecrawl import Firecrawl

# configure

firecrawl = Firecrawl(api_key=os.environ.get("FIRECRAWL_API_KEY"))
start_url = "https://docs.python.org"
limit = 5

print("Starting Firecrawl crawl test")
t0_firecrawl = time.time()

# do the crawl; waiter style
crawl_status = firecrawl.crawl(
    url=start_url,
    limit=limit,
    scrape_options={
        "formats": ["markdown", "html"],
        "only_main_content": True,
        'maxAge': 0,
    },
    poll_interval=0.1,   # seconds
    timeout=60          # seconds before giving up
)

t1_firecrawl = time.time()

print("Raw crawl_status object:", crawl_status)
print("Error field:", getattr(crawl_status, "error", None))
print("Data length:", len(getattr(crawl_status, "data", []) or []))
print(f"Total time: {t1_firecrawl - t0_firecrawl:.2f}s")

# If data exists, print a sample
if getattr(crawl_status, "success", False):
    data = getattr(crawl_status, "data", []) or []
    if data:
        first = data[0]
        print("First page metadata.title:", getattr(first.metadata, "title", None))
        print("First page markdown:", getattr(first, "markdown", "")[:200] + "...")


Starting Firecrawl crawl test
Error field: None
Data length: 5
Total time: 2.31s


In [None]:
import os
import time
from supacrawler import SupacrawlerClient 


client = SupacrawlerClient(api_key=os.environ.get("SUPACRAWLER_API_KEY"))
start_url = "https://docs.python.org"
max_pages = 5

print("Starting SupaCrawler crawl test")
t0_supacrawler = time.time()

try:
    # start crawl job
    job = client.create_crawl_job(
        url=start_url,
        format="markdown",
        link_limit=max_pages,
        depth=1,
        include_subdomains=False,
        render_js=True,
        fresh=True
    )
    print("Created job:", job.job_id)

    # wait for crawl to finish
    crawl_output = client.wait_for_crawl(
        job.job_id,
        interval_seconds=0.1,
        timeout_seconds=120.0
    )

    t1_supacrawler = time.time()
    crawl_data = crawl_output.data.crawl_data

    print("Raw crawl_output:", crawl_output)
    print("Pages crawled:", len(crawl_data))
    print(f"Total time: {t1_supacrawler - t0_supacrawler:.2f}s")

    if crawl_data:
        first = crawl_data[0]
        print("First page URL:", getattr(first, "url", None))
        print("First page content (snippet):", getattr(first, "content", "")[:200] + "...")

except Exception as e:
    t1_supacrawler = time.time()
    print("Error during crawl:", str(e))
    print(f"Total time: {t1_supacrawler - t0_supacrawler:.2f}s")

finally:
    client.close()


Starting SupaCrawler crawl test
Created job: a64aabd8-97d2-4140-b703-1138b82109c2
Raw crawl_output: success=True job_id='a64aabd8-97d2-4140-b703-1138b82109c2' status='completed' data=CrawlData(url='https://docs.python.org', pages={'https://docs.python.org': Page(markdown='# Python 3.13.7 documentation\nWelcome! This is the official documentation for Python 3.13.7.\n**Documentation sections:**\n[What\'s new in Python 3.13?](whatsnew/3.13.html)\nOr [all "What\'s new" documents since Python 2.0](whatsnew/index.html) [Tutorial](tutorial/index.html)\nStart here: a tour of Python\'s syntax and features\n[Library reference](library/index.html)\nStandard library and builtins\n[Language reference](reference/index.html)\nSyntax and language elements\n[Python setup and usage](using/index.html)\nHow to install, configure, and use Python\n[Python HOWTOs](howto/index.html)\nIn-depth topic manuals\n[Installing Python modules](installing/index.html)\nThird-party modules and PyPI.org\n[Distributing Pyt

In [77]:
print("\n=== Crawl Benchmark Summary ===")

# Firecrawl metrics
fc_pages = len(getattr(crawl_status, "data", []) or [])
fc_time = t1_firecrawl - t0_firecrawl
fc_avg_time = fc_time / fc_pages if fc_pages > 0 else float("inf")

print(f"Firecrawl -> Pages: {fc_pages}, "
      f"Total Time: {fc_time:.2f}s, Avg Time/Page: {fc_avg_time:.2f}s")

# SupaCrawler metrics
sc_pages = len(crawl_data) if "crawl_data" in locals() and crawl_data else 0
sc_time = t1_supacrawler - t0_supacrawler
sc_avg_time = sc_time / sc_pages if sc_pages > 0 else float("inf")

print(f"SupaCrawler -> Pages: {sc_pages}, "
      f"Total Time: {sc_time:.2f}s, Avg Time/Page: {sc_avg_time:.2f}s")

# Relative performance
if fc_pages > 0 and sc_pages > 0:
    speed_ratio = fc_avg_time / sc_avg_time
    print(f"\nPerformance: SupaCrawler is {speed_ratio:.1f}x faster per page "
          if speed_ratio > 1 else
          f"\nPerformance: Firecrawl is {1/speed_ratio:.1f}x faster per page")

# Cost model comparison (static, from docs)
print("\n=== Cost Comparison (100,000 requests/month) ===")
print("Firecrawl: $100/month for 100,000 requests")
print("SupaCrawler: $65/month for 100,000 requests")
print("Savings: 35% cost reduction with SupaCrawler")



=== Crawl Benchmark Summary ===
Firecrawl -> Pages: 5, Total Time: 2.31s, Avg Time/Page: 0.46s
SupaCrawler -> Pages: 6, Total Time: 1.94s, Avg Time/Page: 0.32s

Performance: SupaCrawler is 1.4x faster per page 

=== Cost Comparison (100,000 requests/month) ===
Firecrawl: $100/month for 100,000 requests
SupaCrawler: $65/month for 100,000 requests
Savings: 35% cost reduction with SupaCrawler
