First let's start with a basic scrape:

In [3]:
# Supacrawler Python SDK - Scrape Examples
import os
from dotenv import load_dotenv
from supacrawler import SupacrawlerClient
from supacrawler.scraper_client.models import GetV1ScrapeFormat

load_dotenv()

SUPACRAWLER_API_KEY=os.environ.get("SUPACRAWLER_API_KEY")

client = SupacrawlerClient(api_key="", base_url=l")

# Basic markdown scrape
res_md = client.scrape("https://supabase.com", format="markdown")
print(res_md)
print("Scrape content:\n", res_md.markdown)
print("Metadata:\n", res_md.metadata.to_json())

TypeError: SupacrawlerClient.__init__() missing 1 required positional argument: 'api_key'

A good example of a website that won't work unless you set `render_js=True` is `https://ai.google.dev/gemini-api/docs`:

In [None]:
# This will not work for google with the following error:
# This happens because google redirects you to authenticate with google account
res_md = client.scrape("https://ai.google.dev/gemini-api/docs", format="markdown")

Error(success=False, error='failed to scrape URL after 3 attempts: fetch: Get "https://accounts.google.com/o/oauth2/v2/auth?client_id=157101835696-ooapojlodmuabs2do2vuhhnf90bccmoi.apps.googleusercontent.com&redirect_uri=https%3A%2F%2Fai.google.dev%2Foauth2callback&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdeveloperprofiles+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdeveloperprofiles.award+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fgoogledevelopers+openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.profile&access_type=online&response_type=code&state=%7B%22csrf_token%22%3A+%2267f09905c0960ba629e12739581f7c4541b618ab0be8acf0820419ce729b1e73%22%2C+%22return_url%22%3A+%22https%3A%2F%2Fai.google.dev%2Fgemini-api%2Fdocs%22%7D&prompt=none&auto_signin=True": stopped after 10 redirects', additional_properties={})

In [9]:
# Rendering JS should work!
res_md_rendered = client.scrape("https://ai.google.dev/gemini-api/docs", format="markdown", render_js=True)
print("This will work:\n", res_md_rendered)
print("Content:\n", res_md_rendered.markdown)
print("Metadata:\n", res_md_rendered.metadata)

This will work:
 markdown='# Gemini Developer API\n[Get a Gemini API Key](https://aistudio.google.com/apikey)\nGet a Gemini API key and make your first API request in minutes.\n\n### Python\n```\nfrom google import genai\nclient = genai.Client()\nresponse = client.models.generate_content(\nmodel="gemini-2.5-flash",\ncontents="Explain how AI works in a few words",\n)\nprint(response.text)\n```\n\n### JavaScript\n```\nimport { GoogleGenAI } from "@google/genai";\nconst ai = new GoogleGenAI({});\nasync function main() {\nconst response = await ai.models.generateContent({\nmodel: "gemini-2.5-flash",\ncontents: "Explain how AI works in a few words",\n});\nconsole.log(response.text);\n}\nawait main();\n```\n\n### Go\n```\npackage main\nimport (\n"context"\n"fmt"\n"log"\n"google.golang.org/genai"\n)\nfunc main() {\nctx := context.Background()\nclient, err := genai.NewClient(ctx, nil)\nif err != nil {\nlog.Fatal(err)\n}\nresult, err := client.Models.GenerateContent(\nctx,\n"gemini-2.5-flash",\

In [10]:
res_md_rendered.metadata.to_json()

{'title': 'Gemini API &nbsp;|&nbsp; Google AI for Developers',
 'status_code': 200,
 'description': 'Gemini Developer API Docs and API Reference',
 'language': None,
 'robots': None}

You can also extract all the existing links within a starting url:

In [5]:
# Links mapping with depth and max_links
res_links = client.scrape("https://supacrawler.com", format="links", depth=2, max_links=10)
print(res_links)

markdown=None html=None links=['https://supacrawler.com/pricing', 'https://supacrawler.com/about', 'https://supacrawler.com/contact', 'https://supacrawler.com/terms-of-service', 'https://supacrawler.com/work', 'https://supacrawler.com', 'https://supacrawler.com/dashboard/scrape', 'https://supacrawler.com/signin', 'https://supacrawler.com/privacy-policy', 'https://supacrawler.com/blog/your-first-web-scrape', 'https://supacrawler.com/blog/how-to-take-full-page-screenshots', 'https://supacrawler.com/blog/how-to-crawl-blogs-and-docs', 'https://supacrawler.com/forgot-password', 'https://supacrawler.com/signup', 'https://supacrawler.com/blog', 'https://supacrawler.com/dashboard'] metadata=None
