In [1]:
# Supacrawler Python SDK - Jobs Examples

from supacrawler import SupacrawlerClient
from supacrawler.scraper_client.models import CrawlCreateRequest
import os
from dotenv import load_dotenv

load_dotenv()

SUPACRAWLER_API_KEY=os.environ.get("SUPACRAWLER_API_KEY")

client = SupacrawlerClient(api_key=SUPACRAWLER_API_KEY)

# Create a crawl job
job = client.create_crawl_job(
    url="https://supabase.com/docs",
    format="markdown",
    link_limit=3,
    depth=1,
    include_subdomains=False,
    render_js=False,
)
job

CrawlCreateResponse(success=True, job_id='90d8b802-8647-4e1a-8eaa-31fed0760338', additional_properties={})

In [4]:
# Poll until completion
crawl_output = client.wait_for_crawl(job.job_id, interval_seconds=3.0, timeout_seconds=300.0)

# Display the output
crawl_output

CrawlJob(success=True, job_id='90d8b802-8647-4e1a-8eaa-31fed0760338', status='completed', data=CrawlData(url='https://supabase.com/docs', pages={'https://supabase.com/docs': Page(markdown='# Supabase Documentation\nLearn how to get up and running with Supabase through tutorials, APIs and platform resources.\n\n## Getting Started\nSet up and connect a database in just a few minutes.\n[Start with Supabase AI prompts](/docs/guides/getting-started/ai-prompts)\n\n## Products\n- [Database\n\nSupabase provides a full Postgres database for every project with Realtime functionality, database backups, extensions, and more.](/docs/guides/database/overview)\n- [Auth\n\nAdd and manage email and password, passwordless, OAuth, and mobile logins to your project through a suite of identity providers and APIs.](/docs/guides/auth)\n- [Storage\n\nStore, organize, transform, and serve large files—fully integrated with your Postgres database with Row Level Security access policies.](/docs/guides/storage)\n-

In [12]:
# Display the entire output
crawl_output.data.crawl_data

{'https://supabase.com/docs': Page(markdown='# Supabase Documentation\nLearn how to get up and running with Supabase through tutorials, APIs and platform resources.\n\n## Getting Started\nSet up and connect a database in just a few minutes.\n[Start with Supabase AI prompts](/docs/guides/getting-started/ai-prompts)\n\n## Products\n- [Database\n\nSupabase provides a full Postgres database for every project with Realtime functionality, database backups, extensions, and more.](/docs/guides/database/overview)\n- [Auth\n\nAdd and manage email and password, passwordless, OAuth, and mobile logins to your project through a suite of identity providers and APIs.](/docs/guides/auth)\n- [Storage\n\nStore, organize, transform, and serve large files—fully integrated with your Postgres database with Row Level Security access policies.](/docs/guides/storage)\n- [Realtime\n\nListen to database changes, store and sync user states across clients, broadcast data to clients subscribed to a channel, and more

In [7]:
# Get the first URL
crawl_data = crawl_output.data.crawl_data
first_url = crawl_data[0]

# Display the markdown content
first_url.markdown

'# Supabase Documentation\nLearn how to get up and running with Supabase through tutorials, APIs and platform resources.\n\n## Getting Started\nSet up and connect a database in just a few minutes.\n[Start with Supabase AI prompts](/docs/guides/getting-started/ai-prompts)\n\n## Products\n- [Database\n\nSupabase provides a full Postgres database for every project with Realtime functionality, database backups, extensions, and more.](/docs/guides/database/overview)\n- [Auth\n\nAdd and manage email and password, passwordless, OAuth, and mobile logins to your project through a suite of identity providers and APIs.](/docs/guides/auth)\n- [Storage\n\nStore, organize, transform, and serve large files—fully integrated with your Postgres database with Row Level Security access policies.](/docs/guides/storage)\n- [Realtime\n\nListen to database changes, store and sync user states across clients, broadcast data to clients subscribed to a channel, and more.](/docs/guides/realtime)\n- [Edge Functions

In [10]:
# Display the metadata
first_url.metadata.to_json()

{'title': 'Supabase Docs',
 'status_code': 200,
 'description': None,
 'language': None,
 'robots': None}