# RAG with MongoDB Atlas and VertexAI Reasoning Engine using Langchain

In this Notebook we will cover *How to build a RAG and deploy it as endpoints using Reasoning Engine, MongoDB Atlas and VertexAI*

First we will install all thre required dependecies and restart the kernel

In [49]:
!pip install --upgrade --quiet \
    "google-cloud-aiplatform[langchain,reasoningengine]" \
    cloudpickle==3.0.0 \
    pydantic==2.7.4 \
    requests \
    datasets \
    pymongo \
    langchain \
    langchain-mongodb \
    langchain-google-vertexai \

import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}

## Ingest data
To begin with the setup we will load the dataset to MongoDB Atlas. For user convinience, we are using an existing Hugingface MongoDB embedding dataset. Run the below code to import the *MongoDB/subset_arxiv_papers_with_embeddings* dataset as ds and load to MongoDB Atlas.




## Create vector search index on the newly created MongoDb collection


// To do: add code for creating atlas vector search index on the collection

In [1]:
PROJECT_ID = "gcp-pov"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}
STAGING_BUCKET = "gs://vshanbh01"  # @param {type:"string"}

import vertexai

vertexai.init(project=PROJECT_ID, location=LOCATION, staging_bucket=STAGING_BUCKET)

In [2]:
from vertexai.preview import reasoning_engines


In [3]:
model = "gemini-1.5-pro-001"


In [22]:
def get_vectors_from_mongodb(
    query: str
):
    """
    Retrieves vectors from a MongoDB database and uses them to answer a question.

    Args:
        query: The question to be answered.

    Returns:
        A dictionary containing the response to the question.
    """
    from langchain.chains import ConversationalRetrievalChain, RetrievalQA
    from langchain_mongodb import MongoDBAtlasVectorSearch
    from langchain_google_vertexai import VertexAIEmbeddings, ChatVertexAI
    from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory
    from pymongo import MongoClient
    import certifi

    from langchain.prompts import PromptTemplate


    prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Summarise the response in 2 sentences.


    {context}


    Question: {question}
    """
    PROMPT = PromptTemplate(
        template=prompt_template, input_variables=["context", "question"]
    )

    # Add your connection string in srv format below in place of URI
    client = MongoClient("URI", tlsCAFile=certifi.where())
    db = client["vertexaiApp"]

    embeddings = VertexAIEmbeddings(model_name="textembedding-gecko@001")

    vs = MongoDBAtlasVectorSearch(
        collection=db["chat-vec"],
        embedding=embeddings,
        index_name="vector_index",
        embedding_key="vec",
        text_key="line",
    )

    llm = ChatVertexAI(
        model_name="gemini-pro",
        convert_system_message_to_human=True,
        max_output_tokens=1000,
    )
    retriever = vs.as_retriever(
        search_type="mmr", search_kwargs={"k": 10, "lambda_mult": 0.25}
    )
    memory = ConversationBufferWindowMemory(
        memory_key="chat_history", k=5, return_messages=True
    )
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        memory=memory,
        combine_docs_chain_kwargs={"prompt": PROMPT},
    )
    response = conversation_chain({"question": query})

    return response

In [None]:
get_vectors_from_mongodb(query="tell me about 04 Examples of collaborations with AWS")

In [44]:
agent = reasoning_engines.LangchainAgent(
    model=model,
    tools=[get_vectors_from_mongodb],
)

In [None]:
agent.query(input="tell me about 04 Examples of collaborations with AWS")

In [None]:
remote_agent = reasoning_engines.ReasoningEngine.create(
    agent,
    requirements=[
        "google-cloud-aiplatform[langchain,reasoningengine]",
        "cloudpickle==3.0.0",
        "pydantic==2.7.4",
        "langchain-mongodb",
        "pymongo",
        "langchain-google-vertexai",

    ],
)