In [20]:
import pandas as pd
import numpy as np
import os
import gradio as gr
from dotenv import load_dotenv


from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.chains import LLMChain
from langchain.prompts import ChatPromptTemplate
from langchain.vectorstores import FAISS, Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain.document_loaders import DataFrameLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import hub



from langchain_core.runnables import (
    ConfigurableField,
    RunnableBinding,
    RunnableLambda,
    RunnablePassthrough,
    RunnableParallel
)

pd.set_option('display.max_colwidth', 300)  # Or use a large number if 'None' does not work in some environments
pd.set_option('display.max_columns', 300)  # Show all columns
pd.set_option('display.max_rows', 20)  # Show all row

load_dotenv()
OPENAI_APIKEY = os.environ['OPENAI_APIKEY']

# Set up RAG Chain

In [11]:
# Instantiate models
embeddings_model = OpenAIEmbeddings(api_key=OPENAI_APIKEY, model='text-embedding-3-large', max_retries=100, chunk_size=16, show_progress_bar=False)
chat_model_4 = ChatOpenAI(api_key=OPENAI_APIKEY, temperature=0.5, model='gpt-4-turbo-2024-04-09')
# # load chroma from disk
vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings_model)
# Set up the vectorstore to be the retriever
retriever = vectorstore.as_retriever(k=5)
# Get pre-written rag prompt
prompt = hub.pull("rlm/rag-prompt")
# Format docs function
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)
# Create RAG Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | chat_model_4
    | StrOutputParser()
)

# Test the RAG Chain

In [16]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

In [13]:
# Test the RAG chain
rag_chain.invoke("Diabetes and eggs")

'Eating eggs has been associated with an increased risk of developing type 2 diabetes. Studies indicate that consuming as little as one egg per week can significantly increase the likelihood of diabetes, with higher consumption leading to even greater risks. This association has been confirmed across various studies and populations, suggesting that high egg intake before and during pregnancy can also increase the risk of gestational diabetes.'

In [15]:
# Test the vector store search
docs = vectorstore.similarity_search("eggs and diabetes", k=5)
docs[0]

Document(page_content='"Eggs and Diabetes" Type 2 diabetes is becoming  a world pandemic. We know the consumption  of eggs is related to the development of some  other chronic diseases... what about diabetes? Researchers found a stepwise  increase in risk the more and more  eggs people ate. Eating just a single  egg a week appeared to increase the odds  of diabetes by 76%. Two eggs a week appeared  to double the odds, and just a single egg  a day tripled the odds. Three times greater  risk of type 2 diabetes, one of the leading causes  of death and amputation, blindness, and kidney failure. This is not the  first time a link between eggs and diabetes  has been reported. In 2009, Harvard researchers found  that a single egg a day or more was associated with  an increased risk of type 2 diabetes  in men and women, and that finding has since  also been confirmed in other populations:  Asia in 2011 and Europe in 2012. And the "high" consumption of eggs associated with diabetes risk  was le

In [17]:
def generate_answer(message, history):
    answer = rag_chain.invoke(message)

    return answer

In [21]:
gr.ChatInterface(
    generate_answer,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Ask me a question about nutrition and health", container=False, scale=7),
    title="Nutrition Facts ChatBot",
    description="Ask Dr Michael McGregor's Nutrition Facts videos any questions!",
    theme="soft",
    examples=["diverticulosis", "heart disease", "low carb diets"],
    cache_examples=False,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
).launch(debug=True)

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


Keyboard interruption in main thread... closing server.


