# LlamaIndex + Supabase with Supacrawler

- Supabase AI Overview: https://supabase.com/docs/guides/ai
- LlamaIndex integration: https://supabase.com/docs/guides/ai/integrations/llamaindex



In [None]:
import os
from supacrawler import SupacrawlerClient, ScrapeParams
from llama_index.vector_stores.postgres import PGVectorStore
from llama_index.core import Document, VectorStoreIndex, StorageContext

# Switchable embeddings: local HF or OpenAI
USE_HF = True
HF_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'  # 384 dims

DB_URL = os.environ.get('DATABASE_URL', 'postgresql+psycopg://user:pass@host:5432/db')
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', 'YOUR_OPENAI_KEY')
SUPACRAWLER_API_KEY = os.environ.get('SUPACRAWLER_API_KEY', 'YOUR_API_KEY')


In [None]:

crawler = SupacrawlerClient(api_key=SUPACRAWLER_API_KEY)
scrape = crawler.scrape(ScrapeParams(url='https://example.com', format='markdown'))

if USE_HF:
    # pip install sentence-transformers or transformers+torch
    from llama_index.embeddings.huggingface import HuggingFaceEmbedding
    embed_model = HuggingFaceEmbedding(model_name=HF_MODEL)
else:
    from llama_index.embeddings.openai import OpenAIEmbedding
    embed_model = OpenAIEmbedding(model='text-embedding-3-small', api_key=OPENAI_API_KEY)

store = PGVectorStore.from_params(
    database_url=DB_URL,
    collection_name='llama_docs',
    embed_dim=384 if USE_HF else 1536,
)
ctx = StorageContext.from_defaults(vector_store=store)
index = VectorStoreIndex.from_documents(
    [Document(text=scrape.content, metadata={'url': scrape.url, 'title': getattr(scrape, 'title', None)})],
    storage_context=ctx,
    embed_model=embed_model,
)

qe = index.as_query_engine()
resp = qe.query('What is this page about?')
print(resp)

