# About
- Build basic RAG chain
- Build RAG chain that also returns the sources

# Imports and Settings

In [2]:
import os
import gradio as gr

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain import hub
from langchain_core.runnables import RunnablePassthrough

from dotenv import load_dotenv
load_dotenv()
OPENAI_APIKEY = os.environ['OPENAI_APIKEY']

# Create Basic RAG Chain

In [46]:
# Instantiate embeddings model
embeddings_model = OpenAIEmbeddings(api_key=OPENAI_APIKEY, model='text-embedding-3-large', max_retries=100, chunk_size=16, show_progress_bar=False)

# Instantiate chat model
chat_model_4 = ChatOpenAI(api_key=OPENAI_APIKEY, temperature=0.5, model='gpt-4-turbo-2024-04-09')


In [50]:
# Load chroma from disk
vectorstore = Chroma(persist_directory="../chroma_db/", embedding_function=embeddings_model)

# Set up the vectorstore to be the retriever
k = 5
retriever = vectorstore.as_retriever(search_kwargs={"k":k})

# Format docs function
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

## Test out the retriever

In [51]:
# Test out the retriever
query = "eggs and diabetes"
results = retriever.invoke(query)

In [53]:
# How would the formatted documents be rendered in the query?
print(format_docs(results))

"Eggs and Diabetes" Type 2 diabetes is becoming  a world pandemic. We know the consumption  of eggs is related to the development of some  other chronic diseases... what about diabetes? Researchers found a stepwise  increase in risk the more and more  eggs people ate. Eating just a single  egg a week appeared to increase the odds  of diabetes by 76%. Two eggs a week appeared  to double the odds, and just a single egg  a day tripled the odds. Three times greater  risk of type 2 diabetes, one of the leading causes  of death and amputation, blindness, and kidney failure. This is not the  first time a link between eggs and diabetes  has been reported. In 2009, Harvard researchers found  that a single egg a day or more was associated with  an increased risk of type 2 diabetes  in men and women, and that finding has since  also been confirmed in other populations:  Asia in 2011 and Europe in 2012. And the "high" consumption of eggs associated with diabetes risk  was less than one a day,

and

Might be nice to also have the title!

In [55]:
# Format docs function with the addition of the title
def format_docs(docs):
    return "\n\n".join((doc.metadata['title'] + '\n' + doc.page_content) for doc in docs)

In [56]:
print(format_docs(results))

Eggs and Diabetes
"Eggs and Diabetes" Type 2 diabetes is becoming  a world pandemic. We know the consumption  of eggs is related to the development of some  other chronic diseases... what about diabetes? Researchers found a stepwise  increase in risk the more and more  eggs people ate. Eating just a single  egg a week appeared to increase the odds  of diabetes by 76%. Two eggs a week appeared  to double the odds, and just a single egg  a day tripled the odds. Three times greater  risk of type 2 diabetes, one of the leading causes  of death and amputation, blindness, and kidney failure. This is not the  first time a link between eggs and diabetes  has been reported. In 2009, Harvard researchers found  that a single egg a day or more was associated with  an increased risk of type 2 diabetes  in men and women, and that finding has since  also been confirmed in other populations:  Asia in 2011 and Europe in 2012. And the "high" consumption of eggs associated with diabetes risk  was less th

In [59]:
# Get the prompt from the langchain hub. Or you could write your own!
prompt = hub.pull("rlm/rag-prompt")

In [60]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

## Test out the RAG_Chain

In [61]:
# Create RAG Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | chat_model_4
    | StrOutputParser())

In [62]:
query = "eggs and diaetes"
rag_chain.invoke(query)

'Research indicates that higher consumption of eggs is associated with an increased risk of type 2 diabetes. Studies have shown that even consuming less than one egg per day can elevate the risk, and the risk increases progressively with the number of eggs consumed. Additionally, for individuals with diabetes, consuming eggs may accelerate mortality and other health complications.'

# Create simple gradio answer bot

In [65]:
# Function to generate answer
def generate_answer(message, history):
    return rag_chain.invoke(message)

In [66]:
# Set up chat bot interface
answer_bot = gr.ChatInterface(
                            generate_answer,
                            chatbot=gr.Chatbot(height=300),
                            textbox=gr.Textbox(placeholder="Ask me a question about nutrition and health", container=False, scale=7),
                            title="Nutrition Facts ChatBot",
                            description="Ask Dr Michael McGregor's Nutrition Facts videos any questions!",
                            theme="soft",
                            examples=["diverticulosis", "heart disease", "low carb diets", "diabetes", "green tea"],
                            cache_examples=False,
                            retry_btn=None,
                            undo_btn=None,
                            clear_btn=None,
                            submit_btn="Ask"
                        )

In [67]:

answer_bot.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


