In [1]:
!pip install -Uq nest_asyncio langchain openai lxml bs4 supabase tiktoken python-dotenv

In [22]:
from langchain.document_loaders.sitemap import SitemapLoader
# fixes a bug with asyncio and jupyter
import nest_asyncio

nest_asyncio.apply()

sitemap_loader = SitemapLoader(
    web_path="https://stripe.com/sitemap/partition-2.xml",
    filter_urls=["^https:\/\/stripe\.com\/docs"]
)

sitemap_loader.requests_per_second = 2

In [23]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100
)

data = sitemap_loader.load_and_split(text_splitter=text_splitter)

Fetching pages: 100%|##########| 1288/1288 [19:22<00:00,  1.11it/s]


In [24]:
len(data)

29997

In [25]:
from supabase.client import Client, create_client
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.supabase import SupabaseVectorStore
import os
from dotenv import load_dotenv

load_dotenv()

supabase_url = os.environ.get("SUPABASE_URL")
supabase_key = os.environ.get("SUPABASE_SERVICE_KEY")
supabase: Client = create_client(supabase_url, supabase_key)

embeddings = OpenAIEmbeddings()

In [33]:
vector_store = SupabaseVectorStore.from_documents(
    documents=data[27000:],
    embedding=embeddings,
    client=supabase,
    table_name="documents",
    query_name="match_documents",
    chunk_size=100
)

2023-11-05 21:21:11,683:INFO - HTTP Request: POST https://qublpyarwoevdeqqicbz.supabase.co/rest/v1/documents "HTTP/1.1 201 Created"
2023-11-05 21:21:21,918:INFO - HTTP Request: POST https://qublpyarwoevdeqqicbz.supabase.co/rest/v1/documents "HTTP/1.1 201 Created"
2023-11-05 21:21:31,638:INFO - HTTP Request: POST https://qublpyarwoevdeqqicbz.supabase.co/rest/v1/documents "HTTP/1.1 201 Created"
2023-11-05 21:21:40,826:INFO - HTTP Request: POST https://qublpyarwoevdeqqicbz.supabase.co/rest/v1/documents "HTTP/1.1 201 Created"
2023-11-05 21:21:50,043:INFO - HTTP Request: POST https://qublpyarwoevdeqqicbz.supabase.co/rest/v1/documents "HTTP/1.1 201 Created"
2023-11-05 21:21:58,189:INFO - HTTP Request: POST https://qublpyarwoevdeqqicbz.supabase.co/rest/v1/documents "HTTP/1.1 201 Created"
2023-11-05 21:22:05,740:INFO - HTTP Request: POST https://qublpyarwoevdeqqicbz.supabase.co/rest/v1/documents "HTTP/1.1 201 Created"
2023-11-05 21:22:13,082:INFO - HTTP Request: POST https://qublpyarwoevdeqqic

In [34]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.retrievers import RePhraseQueryRetriever
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

DEFAULT_TEMPLATE = """You are an assistant tasked with taking a natural language \
query from a user and converting it into a query for a vectorstore. \
In this process, you strip out information that is not relevant for \
the retrieval task. Here is the user query: {question}"""

llm = ChatOpenAI(temperature=0)
retriever_from_llm = RePhraseQueryRetriever.from_llm(
    retriever=vector_store.as_retriever(), llm=llm
)

qa = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever_from_llm, memory=memory)


In [35]:
qa({ "question": "How do I setup Stripe sdk?"})

2023-11-05 21:28:01,761:INFO - Re-phrased question: Query for vectorstore: "setup Stripe sdk"
2023-11-05 21:28:06,573:INFO - HTTP Request: POST https://qublpyarwoevdeqqicbz.supabase.co/rest/v1/rpc/match_documents?limit=4 "HTTP/1.1 200 OK"


{'question': 'How do I setup Stripe sdk?',
 'chat_history': [HumanMessage(content='How do I setup Stripe sdk?'),
  AIMessage(content='To set up the Stripe SDK, you will need to follow these steps:\n\n1. Sign up for a Stripe account at https://dashboard.stripe.com/register.\n2. Once you have signed up and logged in, go to the Stripe Dashboard.\n3. In the Dashboard, click on the "Developers" tab in the left-hand menu.\n4. Under the "Developers" tab, click on "API keys".\n5. You will see your API keys on this page. Take note of your "Publishable key" and "Secret key" as you will need them to configure the SDK.\n6. Install the Stripe SDK for your preferred programming language. You can find the SDKs and their documentation at https://stripe.com/docs/libraries.\n7. Follow the installation instructions provided in the documentation for your chosen SDK.\n8. Once the SDK is installed, you will need to configure it with your API keys. This typically involves setting the "Publishable key" and "S