In [1]:
import pandas as pd
import numpy as np
import os
import gradio as gr
from dotenv import load_dotenv

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain.document_loaders import DataFrameLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import hub
from langchain.schema import AIMessage, HumanMessage, SystemMessage

from langchain_core.runnables import (
    ConfigurableField,
    RunnableBinding,
    RunnableLambda,
    RunnablePassthrough,
    RunnableParallel
)

pd.set_option('display.max_colwidth', 300)  # Or use a large number if 'None' does not work in some environments
pd.set_option('display.max_columns', 300)  # Show all columns
pd.set_option('display.max_rows', 20)  # Show all row

load_dotenv()
OPENAI_APIKEY = os.environ['OPENAI_APIKEY']

# Set up Chain

In [3]:
# Instantiate models
embeddings_model = OpenAIEmbeddings(api_key=OPENAI_APIKEY, model='text-embedding-3-large', max_retries=100, chunk_size=16, show_progress_bar=False)
chat_model_4 = ChatOpenAI(api_key=OPENAI_APIKEY, temperature=0.5, model='gpt-4-turbo-2024-04-09')
# Load chroma from disk
vectorstore = Chroma(persist_directory="../chroma_db", embedding_function=embeddings_model)
# Set up the vectorstore to be the retriever
num_results = 5
retriever = vectorstore.as_retriever(search_kwargs={'k': num_results})

In [4]:
def format_chat_history(chat_history):
    chat_history_langchain_format = []
    for human, ai in chat_history:
        chat_history_langchain_format.append(HumanMessage(content=human))
        chat_history_langchain_format.append(AIMessage(content=ai))
    
    return chat_history_langchain_format

In [5]:
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history", optional=True),
        ("human", "{question}"),
    ]
)

contextualize_chain = contextualize_q_prompt | chat_model_4 | StrOutputParser()

template = """You are an assistant for question-answering tasks regarding
plant-based nutrition to prevent and reverse disease and nutrition science. 
The following are snippets of transcripts from Dr Michael Greger's nutrition facts
videos that you are going to use to to answer the question.

If you don't know the answer, just say that you don't know.
Ask follow-up questions if appropriate. 

Context: {context}

Question: {contextualized_question}

Answer: 

"""

answer_prompt = ChatPromptTemplate.from_template(template)

qa_chain = answer_prompt | chat_model_4 | StrOutputParser()




In [6]:
chat_history = [["Is juice bad for me?", "It's better to avoid juice and drink water"],
                ["Should I drink filtered water?", "Filtered water is usually saafer"],
                ["How much water should I drink a day?", "5-7 cups"]]
question = "But how much for children?."

In [7]:
langchain_chat_history = format_chat_history(chat_history)
langchain_chat_history

[HumanMessage(content='Is juice bad for me?'),
 AIMessage(content="It's better to avoid juice and drink water"),
 HumanMessage(content='Should I drink filtered water?'),
 AIMessage(content='Filtered water is usually saafer'),
 HumanMessage(content='How much water should I drink a day?'),
 AIMessage(content='5-7 cups')]

In [8]:

contextualized_question = contextualize_chain.invoke({'chat_history': langchain_chat_history, 'question': question})
contextualized_question


'How much water should children drink per day?'

In [9]:
context = retriever.invoke(contextualized_question)
[c.metadata['title'] for c in context]

['What Is the Safest Metabolism Booster?',
 'What Is the Safest Metabolism Booster?',
 'How Many Glasses of Water Should We Drink a Day?',
 'How Many Glasses of Water Should We Drink a Day?',
 'Podcast: Boosting Our Metabolism']

In [10]:
answer = qa_chain.invoke({ 'context': context, 'contextualized_question': contextualized_question})
answer

"The recommended daily adequate intake of water for children varies by age. For children ages 4 through 8, it is recommended that they drink about 7 cups of water a day. For ages 9 through 13, the recommendation is 8 cups a day for girls and 10 cups a day for boys. These recommendations may offer more than just hydration benefits, as some research suggests possible increases in metabolic rate associated with water consumption. However, it's important to note that not all research teams were able to replicate these findings consistently."

In [11]:
def format_result(answer, context):
    # Extract unique pairs of titles and video IDs from the context
    unique_videos = set((doc.metadata['title'], doc.metadata['videoId']) for doc in context)
    
    # Create a plain text string where each title is followed by its URL
    titles_with_links = [
        f"{title}: https://www.youtube.com/watch?v={video_id}" for title, video_id in unique_videos
    ]
    
    # Join these entries with line breaks to form a clear list
    titles_string = '\n'.join(titles_with_links)
    titles_formatted = f"Relevant Videos:\n{titles_string}"
    
    # Combine the answer from the result with the formatted list of video links
    response = f"{answer}\n\n{titles_formatted}"

    return response

In [12]:
print(format_result(answer, context))

The recommended daily adequate intake of water for children varies by age. For children ages 4 through 8, it is recommended that they drink about 7 cups of water a day. For ages 9 through 13, the recommendation is 8 cups a day for girls and 10 cups a day for boys. These recommendations may offer more than just hydration benefits, as some research suggests possible increases in metabolic rate associated with water consumption. However, it's important to note that not all research teams were able to replicate these findings consistently.

Relevant Videos:
What Is the Safest Metabolism Booster?: https://www.youtube.com/watch?v=WGlENiyN4Dw
Podcast: Boosting Our Metabolism: https://www.youtube.com/watch?v=HyuXNzLzKYM
How Many Glasses of Water Should We Drink a Day?: https://www.youtube.com/watch?v=qrzSLIBOauM
