In [1]:
import getpass
import os

# os.environ["OPENAI_API_KEY"] = getpass.getpass()


In [2]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import PromptTemplate, HumanMessagePromptTemplate

In [3]:
bs4_strainer = bs4.SoupStrainer(class_=("UH-Feature-GenericContent-ContentBlockFullWidth"))
loader = WebBaseLoader(
    web_paths=("https://www.uhhospitals.org/services/clinical-nutrition-services/patient-resources/diet-information/carb-counting-nutrition-guide",),
    bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()

In [4]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100, chunk_overlap=50, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

In [5]:
vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())

In [6]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})

In [7]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [8]:
prompt = hub.pull("rlm/rag-prompt")

In [9]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

In [10]:
llm_prompt = ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template="You are an assistant for question-answering tasks. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nAnswer:"))])

In [11]:
llm_prompt

ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template="You are an assistant for question-answering tasks. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nAnswer:"))])

In [12]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [13]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [14]:
llm_chain = (
    {"question": RunnablePassthrough()}
    | llm_prompt
    | llm
    | StrOutputParser()
)

In [15]:
llm_chain.invoke("How many carbs are in a meal of chicken nuggets?")

"I don't know the exact number of carbs in a meal of chicken nuggets as it can vary depending on the brand and serving size. It's best to check the nutrition label or contact the manufacturer for accurate information."

In [16]:
rag_chain.invoke("How many carbs are in a meal of chicken nuggets?")

'A meal of chicken nuggets contains 15 grams of carbs.'

In [17]:
# vectorstore.delete_collection()