First let's start with a basic scrape:

In [7]:
# Supacrawler Python SDK - Scrape Examples
import os
from dotenv import load_dotenv
from supacrawler import SupacrawlerClient
from supacrawler.scraper_client.models import GetV1ScrapeFormat

load_dotenv()

SUPACRAWLER_API_KEY=os.environ.get("SUPACRAWLER_API_KEY")
client = SupacrawlerClient(api_key=SUPACRAWLER_API_KEY)

# Basic markdown scrape
res_md = client.scrape("https://supacrawler.com", format="markdown")
print(res_md)

ScrapeResponse(success=True, url='https://supacrawler.com', metadata=ScrapeMetadata(status_code=200, depth=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, source_url='https://supacrawler.com', title='Supacrawler', description='One API to extract structured content, crawl entire sites, monitor changes, and capture pixel-perfect screenshots. Built-in headless browser, anti-blocking. Start for free.', language=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, canonical=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, favicon='https://supacrawler.com/favicon-16x16.png', og_title='Supacrawler', og_description='One API to extract structured content, crawl entire sites, monitor changes, and capture pixel-perfect screenshots. Built-in headless browser, anti-blocking. Start for free.', og_image='https://supacrawler.com/opengraph-image?9f80819947a5ecad', og_site_name=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, twitter_title='Supacr

In [8]:
res_md.content

"# Scrape Everything\n[Get 500 credits for free](/signup) [Get Started](/dashboard)\n\n## Deploy faster\nBuilt for your needs\nScrape, transform, and load data from any website.\nCustomize your data to fit your needs.\n\n## Developer-first\nThree powerful APIs\nExtract data from any website with our simple, reliable APIs. From single pages to entire websites, we handle the complexity so you can focus on building.\n```\n1// Scrape API - Extract clean content from any webpage\n2import { SupacrawlerClient } from '@supacrawler/js'\n3\n4const client = new SupacrawlerClient({ apiKey: process.env.SUPACRAWLER_API_KEY })\n5const result = await client.scrape({ url: 'https://example.com', format: 'markdown' })\n6console.log(result) // Clean markdown content\n```\n\n"

A good example of a website that won't work unless you set `render_js=True` is `https://ai.google.dev/gemini-api/docs`:

In [10]:
# This happens because google redirects you to authenticate with google account. Catch the error and log it.
try:
    res_md = client.scrape("https://ai.google.dev/gemini-api/docs", format="markdown")
except Exception as e:
    print("This will not work for google with the following error:\n", e)
    print("---")

In [11]:
res_md

In [13]:
# Rendering JS should work!
res_md_rendered = client.scrape("https://supacrawler.com", format="markdown", render_js=True)
print("This will work:\n", res_md_rendered)
print("Content:\n", res_md_rendered.content)
print("Metadata:\n", res_md_rendered.metadata)

This will work:
 ScrapeResponse(success=True, url='https://supacrawler.com', metadata=ScrapeMetadata(status_code=200, depth=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, source_url='https://supacrawler.com', title='Supacrawler', description='One API to extract structured content, crawl entire sites, monitor changes, and capture pixel-perfect screenshots. Built-in headless browser, anti-blocking. Start for free.', language=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, canonical=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, favicon='https://supacrawler.com/favicon-16x16.png', og_title='Supacrawler', og_description='One API to extract structured content, crawl entire sites, monitor changes, and capture pixel-perfect screenshots. Built-in headless browser, anti-blocking. Start for free.', og_image='https://supacrawler.com/opengraph-image?9f80819947a5ecad', og_site_name=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, twit

In [None]:
import json

# Display metadata
metadata_json = json.dumps(res_md_rendered.metadata.to_dict(), indent=2)
print(metadata_json)

{
  "status_code": 200,
  "source_url": "https://supacrawler.com",
  "title": "Supacrawler",
  "description": "One API to extract structured content, crawl entire sites, monitor changes, and capture pixel-perfect screenshots. Built-in headless browser, anti-blocking. Start for free.",
  "favicon": "https://supacrawler.com/favicon-16x16.png",
  "og_title": "Supacrawler",
  "og_description": "One API to extract structured content, crawl entire sites, monitor changes, and capture pixel-perfect screenshots. Built-in headless browser, anti-blocking. Start for free.",
  "og_image": "https://supacrawler.com/opengraph-image?9f80819947a5ecad",
  "twitter_title": "Supacrawler",
  "twitter_description": "One API to extract structured content, crawl entire sites, monitor changes, and capture pixel-perfect screenshots. Built-in headless browser, anti-blocking. Start for free.",
  "twitter_image": "https://supacrawler.com/opengraph-image"
}


You can also extract all the existing links within a starting url:

In [18]:
# Links mapping with depth and max_links
res_links = client.scrape("https://supacrawler.com", format="links", depth=2, max_links=10)
print(res_links)

ScrapeResponse(success=True, url='https://supacrawler.com', metadata=ScrapeMetadata(status_code=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, depth=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, source_url=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, title=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, description=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, language=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, canonical=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, favicon=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, og_title=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, og_description=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, og_image=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, og_site_name=<supacrawler.scraper_client.types.Unset object at 0x1151f9dd0>, twitter_title=<supacrawler.s

In [None]:
# Display the links
res_links.links

['https://supacrawler.com/terms-of-service',
 'https://supacrawler.com/signup',
 'https://supacrawler.com',
 'https://supacrawler.com/pricing',
 'https://supacrawler.com/blog/how-to-crawl-blogs-and-docs',
 'https://supacrawler.com/contact',
 'https://supacrawler.com/blog/your-first-web-scrape',
 'https://supacrawler.com/forgot-password',
 'https://supacrawler.com/dashboard/scrape',
 'https://supacrawler.com/dashboard',
 'https://supacrawler.com/work',
 'https://supacrawler.com/privacy-policy',
 'https://supacrawler.com/blog',
 'https://supacrawler.com/signin',
 'https://supacrawler.com/about',
 'https://supacrawler.com/blog/how-to-take-full-page-screenshots']