In [1]:
!pip install unstructured > /dev/null 

In [6]:
!pip install -Uq markdown langchain openai tiktoken supabase python-dotenv

In [32]:
from langchain.document_loaders import UnstructuredMarkdownLoader
import glob
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import MarkdownHeaderTextSplitter

# Path to the 'docs/' directory, assuming 'docs/' is in the current working directory
docs_path = './docs/**/*.mdx'

# Use glob.glob to match all .mdx files in the directory and subdirectories
mdx_files = glob.glob(docs_path, recursive=True)

docs = []

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

for file_path in mdx_files:
    print(file_path)
    # loader = UnstructuredMarkdownLoader(file_path=mdx_file)
    # doc = loader.load_and_split(text_splitter=text_splitter)
    # docs = [*docs, *doc]

    with open(file_path, 'r', encoding='utf-8') as file:
        mdx_content = file.read()
        headers_to_split_on = [
            ("#", "Header 1"),
            ("##", "Header 2"),
            ("###", "Header 3"),
        ]

        markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
        md_header_splits = markdown_splitter.split_text(mdx_content)

        # Split
        splits = text_splitter.split_documents(md_header_splits)
        docs = [*docs, *splits]

len(docs)

./docs/stroke-width.mdx
./docs/columns.mdx
./docs/transition-duration.mdx
./docs/grid-column.mdx
./docs/backdrop-brightness.mdx
./docs/scroll-snap-type.mdx
./docs/configuration.mdx
./docs/pointer-events.mdx
./docs/padding.mdx
./docs/box-sizing.mdx
./docs/translate.mdx
./docs/divide-style.mdx
./docs/drop-shadow.mdx
./docs/content.mdx
./docs/outline-width.mdx
./docs/transition-delay.mdx
./docs/plugins.mdx
./docs/ring-offset-color.mdx
./docs/user-select.mdx
./docs/flex-basis.mdx
./docs/hue-rotate.mdx
./docs/theme.mdx
./docs/caret-color.mdx
./docs/grid-row.mdx
./docs/transform-origin.mdx
./docs/background-image.mdx
./docs/justify-content.mdx
./docs/transition-timing-function.mdx
./docs/animation.mdx
./docs/grayscale.mdx
./docs/grid-auto-rows.mdx
./docs/box-decoration-break.mdx
./docs/visibility.mdx
./docs/max-width.mdx
./docs/rotate.mdx
./docs/grid-template-columns.mdx
./docs/blur.mdx
./docs/max-height.mdx
./docs/upgrade-guide.mdx
./docs/scroll-snap-stop.mdx
./docs/ring-width.mdx
./docs/to

1960

In [33]:
from supabase.client import Client, create_client
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.supabase import SupabaseVectorStore
import os
from dotenv import load_dotenv

load_dotenv()

supabase_url = os.environ.get("SUPABASE_URL")
supabase_key = os.environ.get("SUPABASE_SERVICE_KEY")
supabase: Client = create_client(supabase_url, supabase_key)

embeddings = OpenAIEmbeddings()

docs[10]

Document(page_content='<div class="absolute inset-0 ring-1 ring-inset ring-black/10 rounded-lg"></div>\n</div>\n<div class="hidden sm:block relative aspect-w-1 aspect-h-1 mt-8 sm:mt-0">\n<img class="w-full object-cover rounded-lg" src="https://images.unsplash.com/photo-1463288889890-a56b2853c40f?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=3132&q=80" />\n<div class="absolute inset-0 ring-1 ring-inset ring-black/10 rounded-lg"></div>\n</div>\n<div class="hidden sm:block relative aspect-w-16 aspect-h-9 mt-8">\n<img class="w-full object-cover rounded-lg" src="https://images.unsplash.com/photo-1611605645802-c21be743c321?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=2940&q=80" />\n<div class="absolute inset-0 ring-1 ring-inset ring-black/10 rounded-lg"></div>\n</div>\n<div class="hidden sm:block relative aspect-w-1 aspect-h-1 mt-8">', metadata={'Header 2': 'Basic usage', 'Header 3': 'Adding based on column 

In [39]:
vector_store = SupabaseVectorStore.from_documents(
    documents=docs[600:],
    embedding=embeddings,
    client=supabase,
    table_name="tailwind_documents",
    query_name="match_tailwind_documents",
    # table_name="documents",
    # query_name="match_documents",
    chunk_size=100,
    # show_progress=True
)

# from typing import List

# chunk_size = 500  # You can modify this value to a smaller number
# id_list: List[str] = []
# for i in range(0, len(docs), chunk_size):
#     chunk = docs[i : i + chunk_size]

#     result = supabase.from_("tailwind_documents").upsert(chunk).execute()  # type: ignore

#     if len(result.data) == 0:
#         raise Exception("Error inserting: No rows added")

#     # VectorStore.add_vectors returns ids as strings
#     ids = [str(i.get("id")) for i in result.data if i.get("id")]

#     id_list.extend(ids)

2023-11-06 15:03:54,723:INFO - HTTP Request: POST https://qublpyarwoevdeqqicbz.supabase.co/rest/v1/tailwind_documents "HTTP/1.1 201 Created"
2023-11-06 15:04:01,591:INFO - HTTP Request: POST https://qublpyarwoevdeqqicbz.supabase.co/rest/v1/tailwind_documents "HTTP/1.1 201 Created"
2023-11-06 15:04:08,235:INFO - HTTP Request: POST https://qublpyarwoevdeqqicbz.supabase.co/rest/v1/tailwind_documents "HTTP/1.1 201 Created"
2023-11-06 15:04:14,286:INFO - HTTP Request: POST https://qublpyarwoevdeqqicbz.supabase.co/rest/v1/tailwind_documents "HTTP/1.1 201 Created"
2023-11-06 15:04:20,144:INFO - HTTP Request: POST https://qublpyarwoevdeqqicbz.supabase.co/rest/v1/tailwind_documents "HTTP/1.1 201 Created"
2023-11-06 15:04:24,609:INFO - HTTP Request: POST https://qublpyarwoevdeqqicbz.supabase.co/rest/v1/tailwind_documents "HTTP/1.1 201 Created"
2023-11-06 15:04:28,851:INFO - HTTP Request: POST https://qublpyarwoevdeqqicbz.supabase.co/rest/v1/tailwind_documents "HTTP/1.1 201 Created"
2023-11-06 15

In [46]:
from langchain.chat_models import ChatOpenAI
# from langchain.chat_models import OpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.retrievers import RePhraseQueryRetriever
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, output_key='answer')

DEFAULT_TEMPLATE = """You are an assistant tasked with taking a natural language \
query from a user and converting it into a query for a vectorstore. \
In this process, you strip out information that is not relevant for \
the retrieval task. Here is the user query: {question}"""

llm = ChatOpenAI(temperature=0)
retriever_from_llm = RePhraseQueryRetriever.from_llm(
    retriever=vector_store.as_retriever(), llm=llm
)
# docs = retriever_from_llm.get_relevant_documents("How do I load documents from Hacker News?")

qa = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vector_store.as_retriever(), memory=memory, return_source_documents=True)

# qa = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever_from_llm, memory=memory)

In [47]:
qa({ "question": "How do I make a red background on my div?" })

2023-11-06 15:12:12,002:INFO - HTTP Request: POST https://qublpyarwoevdeqqicbz.supabase.co/rest/v1/rpc/match_tailwind_documents?limit=4 "HTTP/1.1 200 OK"


{'question': 'How do I make a red background on my div?',
 'chat_history': [HumanMessage(content='How do I make a red background on my div?'),
  AIMessage(content='To make a red background on your div, you can use the `bg-red-500` utility class. Here\'s an example:\n\n```html\n<div class="bg-red-500">\n  <!-- Your content here -->\n</div>\n```\n\nThis will apply a red background color to your div. You can also customize the shade of red by using different shades from the color palette, such as `bg-red-600` for a darker shade or `bg-red-400` for a lighter shade.')],
 'answer': 'To make a red background on your div, you can use the `bg-red-500` utility class. Here\'s an example:\n\n```html\n<div class="bg-red-500">\n  <!-- Your content here -->\n</div>\n```\n\nThis will apply a red background color to your div. You can also customize the shade of red by using different shades from the color palette, such as `bg-red-600` for a darker shade or `bg-red-400` for a lighter shade.',
 'source_d