In [8]:
import os                                                                       # For accessing environment variables
from dotenv import load_dotenv                                                  # For loading environment variables from a .env file
import bs4                                                                      # BeautifulSoup, for web scraping and parsing HTML/XML documents

from langchain_community.document_loaders import WebBaseLoader                  # Data Ingestion: Loading web content
from langchain_text_splitters import RecursiveCharacterTextSplitter             # Data Transformation: Splitting text into chunks
from langchain.embeddings import OllamaEmbeddings                               # Embedding: Converting text into vector representations
from langchain_chroma import Chroma                                             # Vector Store: Storing and retrieving vectors
from langchain_core.prompts import ChatPromptTemplate                           # Prompt Template: Structuring prompts for the LLM
from langchain_groq import ChatGroq                                             # LLM: The large language model for generating responses
from langchain.chains import create_retrieval_chain                             # Retrieval Chain: Combining retriever and LLM for Q&A
from langchain.chains.combine_documents import create_stuff_documents_chain     # Combining documents for processing
from langchain_core.messages import AIMessage, HumanMessage                     # Message structures for conversation
from langchain_community.chat_message_histories import ChatMessageHistory       # For maintaining chat histories
from langchain_core.chat_history import BaseChatMessageHistory                  # Base class for chat history
from langchain_core.runnables.history import RunnableWithMessageHistory         # For executing tasks with chat history
from langchain.chains import create_history_aware_retriever                     # Creating retrievers that are aware of history
from langchain_core.prompts import MessagesPlaceholder                          # Placeholder for messages in prompts

load_dotenv()                                                                   # Load environment variables from the .env file


True

In [2]:
# 1. Data Ingestion
#    Load, chunk and index the contents of the blog to create a retriever.

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

documents = loader.load()

In [3]:
# 2. Data Transformation
#    We will convert the document into chunks for processing

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
chunks = text_splitter.split_documents(documents=documents)

In [4]:
# 3. Embedding
#    After converting the documents into chunks we need to convert those into vectors

embeddings = OllamaEmbeddings(model="gemma2:2b")

In [5]:
# 4. Vector Database 
#    We have created the ebeddings, now need to apply those embeddings and store it into database.

vector_store = Chroma.from_documents(documents= chunks,embedding= embeddings)

In [6]:
# 5. Retrieval Chain

retriever = vector_store.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x107881720>)

In [7]:
# 6. LLM Initialisation

groq_api_key=os.getenv("GROQ_API_KEY")
llm = ChatGroq(groq_api_key=groq_api_key,model_name="Llama3-8b-8192")

In [11]:
# 7. Contextualized Prompt

contextualized_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualized_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [12]:
# 8. Question Answer Prompt with no History

system_prompt = (
    """
        You are an assistant for question-answering tasks.
        Use the following pieces of retrieved context to answer
        the question. If you don't know the answer, say that you 
        don't know. Use three sentences at maximum and keep the
        answer concise.
        \n\n
        {context}
    """
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [13]:
# 9. History Aware Retriever Chain

history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)
history_aware_retriever

RunnableBinding(bound=RunnableBranch(branches=[(RunnableLambda(lambda x: not x.get('chat_history', False)), RunnableLambda(lambda x: x['input'])
| VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x107881720>))], default=ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='Given a chat history and the latest user question which might reference context in the chat history, formulate a standalone question which can be understood without the chat history. Do NOT answer the question, just reformulate it if neede

In [14]:
# 10. Question Answer Chain

question_answer_chain = create_stuff_documents_chain(llm,qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever,question_answer_chain)

In [15]:
# 11. The Full Conversational Chain With Session History

store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [16]:
conversational_rag_chain.invoke(
    {"input": "What is Task Decomposition?"},
    config={
        "configurable": {"session_id": "abc123"}
    },
)["answer"]

'Task decomposition is a process in which a complex task is broken down into smaller, more manageable subtasks.'

In [17]:
conversational_rag_chain.invoke(
    {"input": "What are common ways of doing it?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

'One common way of task decomposition is through the use of chain of thought (CoT) prompting, where a model is instructed to "think step by step" to decompose a hard task into smaller and simpler steps.'

In [18]:
# A new session
conversational_rag_chain.invoke(
    {"input": "What is Maximum Inner Product Search (MIPS)?"},
    config={"configurable": {"session_id": "def123"}},
)["answer"]

"I don't know."

In [19]:
# A new session
conversational_rag_chain.invoke(
    {"input": "What is Chain of Thought?"},
    config={"configurable": {"session_id": "ghi123"}},
)["answer"]

'According to the provided context, Chain of Thought (CoT) is a prompting technique used to enhance model performance on complex tasks. It instructs the model to "think step by step" to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and sheds light on the model\'s thinking process.'

In [20]:
store

{'abc123': InMemoryChatMessageHistory(messages=[HumanMessage(content='What is Task Decomposition?'), AIMessage(content='Task decomposition is a process in which a complex task is broken down into smaller, more manageable subtasks.'), HumanMessage(content='What are common ways of doing it?'), AIMessage(content='One common way of task decomposition is through the use of chain of thought (CoT) prompting, where a model is instructed to "think step by step" to decompose a hard task into smaller and simpler steps.')]),
 'def123': InMemoryChatMessageHistory(messages=[HumanMessage(content='What is Maximum Inner Product Search (MIPS)?'), AIMessage(content="I don't know.")]),
 'ghi123': InMemoryChatMessageHistory(messages=[HumanMessage(content='What is Chain of Thought?'), AIMessage(content='According to the provided context, Chain of Thought (CoT) is a prompting technique used to enhance model performance on complex tasks. It instructs the model to "think step by step" to utilize more test-time

In [21]:
# I made 3 different sessions and I can see it is being stored and passed everytime I use that session