 ## Load environment variables

In [None]:
from dotenv import load_dotenv

load_dotenv() 

## load our dataset

In [1]:
from langchain_community.document_loaders import JSONLoader

docs = JSONLoader(jq_schema='.[]', file_path='./dataset.json', text_content=False).load()

## Seed the Chroma DB

In [3]:
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
import chromadb
from chromadb.config import Settings

client = chromadb.HttpClient(host="localhost", port=8000, settings=Settings(allow_reset=True))
collection = client.get_or_create_collection("demo")
embeddings_function = OpenAIEmbeddings()

Chroma().from_documents(
    docs, embeddings_function, client=client, collection_name="demo"
)

<langchain_chroma.vectorstores.Chroma at 0x11ff95750>

In [4]:
from langchain_chroma import Chroma
import chromadb
from chromadb.config import Settings
from langchain_openai import OpenAIEmbeddings

client = chromadb.HttpClient(host="localhost", port=8000, settings=Settings(allow_reset=True))
db = Chroma(client=client, collection_name="demo", embedding_function=OpenAIEmbeddings())

print(client.get_collection("demo"))

name='demo' id=UUID('076b2f76-68c6-4bd0-933e-41dfce297358') metadata=None tenant=None database=None


In [6]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

query = "What is this blog about?"

llm = ChatOpenAI(model='gpt-4-turbo', temperature=0, verbose=True)

# Retrieve and generate using the relevant snippets of the blog.
retriever = db.as_retriever()
# prompt = hub.pull("rlm/rag-prompt")
prompt = PromptTemplate.from_template("""
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Try to keep the answer concise. If the answer has code, ensure you provide the code in a markdown code block format. Add the source URLs provided in the context at the end of the answer. 
Question: {question} 
Context: {context} 
Answer:
""")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

display(rag_chain.invoke(query))

'The blog is about food, specifically focusing on topics like making pancakes.\n\nSources:\n- https://demo.html/b\n- https://demo.html/a'