[refrence link on how to build rag with chat history](https://python.langchain.com/v0.2/docs/tutorials/qa_chat_history/#chains)

[MongoDB ChatHisotryMessage lastest api](https://langchain-mongodb.readthedocs.io/en/latest/langchain_mongodb/chat_message_histories/langchain_mongodb.chat_message_histories.MongoDBChatMessageHistory.html#langchain_mongodb.chat_message_histories.MongoDBChatMessageHistory)

import all libraries here

In [16]:
import bs4
from langchain import hub

from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_mongodb import MongoDBAtlasVectorSearch
from pymongo import MongoClient
from pymongo.operations import SearchIndexModel

from langchain_core.documents import Document
import logging
import os
from dotenv import load_dotenv, find_dotenv

from uuid import uuid4
from bson.objectid import ObjectId

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.messages import AIMessage, HumanMessage

from langchain_google_genai import ChatGoogleGenerativeAI




create mongodb vector store and search index

In [20]:
load_dotenv(find_dotenv(), override=True)
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

gemini_embeddings = GoogleGenerativeAIEmbeddings(
    model="models/text-embedding-004",
    google_api_key=GOOGLE_API_KEY
)
# gemini_embeddings

MONGODB_ATLAS_CLUSTER_URI = os.getenv("MONGODB_ATLAS_CLUSTER_URI")
client = MongoClient(
    MONGODB_ATLAS_CLUSTER_URI
)
DB_NAME = "RAG-Chatbot-Cluster"
COLLECTION_NAME = "RAG-Chatbot-Collection-Test"
ATLAS_VECTOR_SEARCH_INDEX_NAME = "RAG-Chatbot-Index-Test"

MONGODB_COLLECTION = client[DB_NAME][COLLECTION_NAME]

vector_store = MongoDBAtlasVectorSearch(
    collection=MONGODB_COLLECTION,
    embedding=gemini_embeddings,
    index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
    relevance_score_fn="cosine"
)

def initialize_vector_store():
    """Initialize the MongoDB collection and verify the vector search index."""
    try:
        # Verify MongoDB connection
        client.server_info()  # Raises an exception if connection fails
        logging.info("MongoDB connection established successfully")

        # Check if collection exists
        if COLLECTION_NAME not in client[DB_NAME].list_collection_names():
            client[DB_NAME].create_collection(COLLECTION_NAME)
            logging.info(f"Created collection {COLLECTION_NAME}")
        else:
            logging.info(f"Collection {COLLECTION_NAME} already exists")


        # Note: Vector search index must be created in MongoDB Atlas UI or via API
        logging.info(f"Ensure vector search index '{ATLAS_VECTOR_SEARCH_INDEX_NAME}' is configured in MongoDB Atlas for collection {COLLECTION_NAME}")
        create_index()
        # vector_store.create_vector_search_index(
        #     dimensions=768,
        #     filters=[{"type":"filter", "path": "source"}],
        #     update=True
        # )
        # Test vector store by adding a dummy document
        dummy_doc = Document(page_content="Test document", metadata={"file_id": 0})
        vector_store.add_documents([dummy_doc])
        logging.info("Added test document to vector store")

        # Log the inserted document to inspect its structure
        inserted_doc = vector_store._collection.find_one({"file_id": 0})
        if inserted_doc:
            logging.info(f"Inserted test document: {inserted_doc.get('file_id')}")
        else:
            logging.error("Test document not found after insertion")

        # Delete the test document
        result = vector_store._collection.delete_one({"file_id": 0})
        if result.deleted_count > 0:
            logging.info("Successfully deleted test document")
        else:
            logging.warning("No test document was deleted; check document structure or query")

        # Verify deletion
        remaining_doc = vector_store._collection.find_one({".file_id": 0})
        if remaining_doc:
            logging.error(f"Test document still exists after deletion attempt: {remaining_doc}")
        else:
            logging.info("Confirmed test document was deleted")

    except Exception as e:
        logging.error(f"Failed to initialize vector store: {str(e)}")
        raise

def create_index():
    search_index_model = SearchIndexModel(
                definition={
                    "mappings": {
                        "dynamic": True,
                        "fields": {
                            "embedding": {  # Correct structure: field name as key
                                "type": "knnVector",
                                "dimensions": 768,
                                "similarity": "cosine"
                            }
                        }
                    }
                },
                name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
            )
    
    result = MONGODB_COLLECTION.create_search_index(model=search_index_model)
    logging.info(f"Succesfully creating Atlas Search Index: {result}")


def delete_collection():
    """Delete the entire MongoDB collection."""
    try:
        client[DB_NAME].drop_collection(COLLECTION_NAME)
        logging.info(f"Successfully deleted collection {COLLECTION_NAME}")
        return True
    except Exception as e:
        logging.error(f"Error deleting collection {COLLECTION_NAME}: {str(e)}")
        return False
    
initialize_vector_store()
# create_index()

In [19]:
delete_collection()

True

load and split and add **document** to vector store 

In [21]:
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
print(len(splits))
for split in splits:
            # add file id to the metadata of each split
    split.metadata['file_id'] = 0
        
        # add the document chunks to the vector store
vector_store.add_documents(splits)
# vector_store.add_documents(documents=splits, ids=[str(uuid4) for _ in range(len(splits))])

66


['6808eb8e2f0953a2ef63db9d',
 '6808eb8e2f0953a2ef63db9e',
 '6808eb8e2f0953a2ef63db9f',
 '6808eb8e2f0953a2ef63dba0',
 '6808eb8e2f0953a2ef63dba1',
 '6808eb8e2f0953a2ef63dba2',
 '6808eb8e2f0953a2ef63dba3',
 '6808eb8e2f0953a2ef63dba4',
 '6808eb8e2f0953a2ef63dba5',
 '6808eb8e2f0953a2ef63dba6',
 '6808eb8e2f0953a2ef63dba7',
 '6808eb8e2f0953a2ef63dba8',
 '6808eb8e2f0953a2ef63dba9',
 '6808eb8e2f0953a2ef63dbaa',
 '6808eb8e2f0953a2ef63dbab',
 '6808eb8e2f0953a2ef63dbac',
 '6808eb8e2f0953a2ef63dbad',
 '6808eb8e2f0953a2ef63dbae',
 '6808eb8e2f0953a2ef63dbaf',
 '6808eb8e2f0953a2ef63dbb0',
 '6808eb8e2f0953a2ef63dbb1',
 '6808eb8e2f0953a2ef63dbb2',
 '6808eb8e2f0953a2ef63dbb3',
 '6808eb8e2f0953a2ef63dbb4',
 '6808eb8e2f0953a2ef63dbb5',
 '6808eb8e2f0953a2ef63dbb6',
 '6808eb8e2f0953a2ef63dbb7',
 '6808eb8e2f0953a2ef63dbb8',
 '6808eb8e2f0953a2ef63dbb9',
 '6808eb8e2f0953a2ef63dbba',
 '6808eb8e2f0953a2ef63dbbb',
 '6808eb8e2f0953a2ef63dbbc',
 '6808eb8e2f0953a2ef63dbbd',
 '6808eb8e2f0953a2ef63dbbe',
 '6808eb8e2f09

In [26]:
MONGODB_COLLECTION.distinct(key="sessionId")

[]

Chain

In [4]:
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash-001",
    google_api_key=os.getenv('GOOGLE_API_KEY')
)
retriever = vector_store.as_retriever()

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [5]:
response = rag_chain.invoke({"input": "What is Task Decomposition?"})
response

{'input': 'What is Task Decomposition?',
 'context': [],
 'answer': 'Task decomposition is the process of breaking down a large, complex task into smaller, more manageable subtasks. This helps to simplify the task, making it easier to understand, plan, and execute. It also allows for more efficient allocation of resources and responsibilities. '}

# Addingn history message


In [6]:

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

qa_prompt = ChatPromptTemplate(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

Adding question with chat history

In [7]:
chat_history = []

question = "What is Task Decomposition?"
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)

second_question = "What are common ways of doing it?"
ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

print(ai_msg_2["answer"])

Task decomposition can be done in three common ways: 

1. **LLM with simple prompting:** The LLM is instructed to list steps or subgoals for completing the task. 
2. **Task-specific instructions:** The LLM is given instructions tailored to the specific task, such as "Write a story outline" for writing a novel.
3. **Human inputs:** A human can provide a breakdown of the task, guiding the LLM on how to approach it. 


Here we've gone over how to add application logic for incorporating historical outputs, but we're still manually updating the chat history and inserting it into each input. In a real Q&A application we'll want some way of persisting chat history and some way of automatically inserting and updating it.

In [8]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [9]:
conversational_rag_chain.invoke(
    {"input": "What is Task Decomposition?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

'Task decomposition is the process of breaking down a complex task into smaller, simpler steps. This can be achieved through prompting techniques like Chain of Thought (CoT), which instructs the model to "think step by step," or by using task-specific instructions like "Write a story outline." '

In [10]:
conversational_rag_chain.invoke(
    {"input": "What are common ways of doing it?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

'Task decomposition can be done in a few ways:\n\n1. **LLM with simple prompting:**  The LLM can be prompted with simple questions like "Steps for XYZ.\\n1." or "What are the subgoals for achieving XYZ?". \n2. **Task-specific instructions:** You can provide specific instructions tailored to the task, such as "Write a story outline" for writing a novel.\n3. **Human inputs:** Humans can directly provide the breakdown of tasks, especially when the task is highly complex or requires domain expertise. '

In [13]:
store

{'abc123': InMemoryChatMessageHistory(messages=[HumanMessage(content='What is Task Decomposition?', additional_kwargs={}, response_metadata={}), AIMessage(content='Task decomposition is the process of breaking down a complex task into smaller, simpler steps. This can be achieved through prompting techniques like Chain of Thought (CoT), which instructs the model to "think step by step," or by using task-specific instructions like "Write a story outline." ', additional_kwargs={}, response_metadata={}), HumanMessage(content='What are common ways of doing it?', additional_kwargs={}, response_metadata={}), AIMessage(content='Task decomposition can be done in a few ways:\n\n1. **LLM with simple prompting:**  The LLM can be prompted with simple questions like "Steps for XYZ.\\n1." or "What are the subgoals for achieving XYZ?". \n2. **Task-specific instructions:** You can provide specific instructions tailored to the task, such as "Write a story outline" for writing a novel.\n3. **Human inputs

In [14]:
for message in store["abc123"].messages:
    if isinstance(message, AIMessage):
        prefix = "AI"
    else:
        prefix = "User"

    print(f"{prefix}: {message.content}\n")

User: What is Task Decomposition?

AI: Task decomposition is the process of breaking down a complex task into smaller, simpler steps. This can be achieved through prompting techniques like Chain of Thought (CoT), which instructs the model to "think step by step," or by using task-specific instructions like "Write a story outline." 

User: What are common ways of doing it?

AI: Task decomposition can be done in a few ways:

1. **LLM with simple prompting:**  The LLM can be prompted with simple questions like "Steps for XYZ.\n1." or "What are the subgoals for achieving XYZ?". 
2. **Task-specific instructions:** You can provide specific instructions tailored to the task, such as "Write a story outline" for writing a novel.
3. **Human inputs:** Humans can directly provide the breakdown of tasks, especially when the task is highly complex or requires domain expertise. 

