In [1]:
import getpass, os, pymongo, pprint
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pymongo import MongoClient
from pymongo.operations import SearchIndexModel



In [2]:
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
ATLAS_CONNECTION_STRING = getpass.getpass("MongoDB Atlas SRV Connection String:")

In [3]:
# Connect to your Atlas cluster
client = MongoClient(ATLAS_CONNECTION_STRING)

# Define collection and index name
db_name = "langchain_db"
collection_name = "test"
atlas_collection = client[db_name][collection_name]
vector_search_index = "vector_index"

In [4]:
# Load the PDF
loader = PyPDFLoader("https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP")
data = loader.load()

# Split PDF into documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
docs = text_splitter.split_documents(data)

# Print the first document
docs[0]

Document(metadata={'source': 'https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP', 'page': 0}, page_content='Mong oDB Atlas Best P racticesJanuary 20 19A MongoD B White P aper')

In [5]:
# Create the vector store
vector_store = MongoDBAtlasVectorSearch.from_documents(
    documents = docs,
    embedding = OpenAIEmbeddings(disallowed_special=()),
    collection = atlas_collection,
    index_name = vector_search_index
)

In [9]:
# Create your index model, then create the search index
search_index_model = SearchIndexModel(
    definition ={
        "fields": [
            {
                "type": "vector",
                "path": "embedding",
                "numDimensions": 1536,
                "similarity": "cosine"
            },
            {
                "type": "filter",
                "path": "page",
            }
        ]
    },
    name="vector_index",
    type="vectorSearch"
)

atlas_collection.create_search_index(model=search_index_model)

'vector_index'

### Semantic Search

In [10]:
query = "MongoDB Atlas secutiry"
results = vector_store.similarity_search(query)

pprint.pprint(results)

[Document(metadata={'_id': '66ece73cdf3f150de153174d', 'source': 'https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP', 'page': 17}, page_content='To ensure a secure system right out of the b ox,\nauthentication and I P Address whitelisting are\nautomatically enabled.\nReview the security section of the MongoD B Atlas'),
 Document(metadata={'_id': '66ece73cdf3f150de1531718', 'source': 'https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP', 'page': 15}, page_content='MongoD B Atlas team are also monitoring the underlying\ninfrastructure, ensuring that it is always in a healthy state.\nApplication L ogs And Database L ogs'),
 Document(metadata={'_id': '66ece73cdf3f150de15316fb', 'source': 'https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP', 'page': 13}, page_content='MongoD B.\nMongoD B Atlas incorporates best practices to help keep\nmanaged databases healthy and optimized. T hey ensure\noperational continuity by converting comple x manual tasks

### Semantic Search with Score

In [12]:
query = "MongoDB Atlas secutiry"
results = vector_store.similarity_search_with_score(
    query = query, k = 3
)

pprint.pprint(results)

[(Document(metadata={'_id': '66ece73cdf3f150de153174d', 'source': 'https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP', 'page': 17}, page_content='To ensure a secure system right out of the b ox,\nauthentication and I P Address whitelisting are\nautomatically enabled.\nReview the security section of the MongoD B Atlas'),
  0.9355283379554749),
 (Document(metadata={'_id': '66ece73cdf3f150de1531718', 'source': 'https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP', 'page': 15}, page_content='MongoD B Atlas team are also monitoring the underlying\ninfrastructure, ensuring that it is always in a healthy state.\nApplication L ogs And Database L ogs'),
  0.9292968511581421),
 (Document(metadata={'_id': '66ece73cdf3f150de15316fb', 'source': 'https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP', 'page': 13}, page_content='MongoD B.\nMongoD B Atlas incorporates best practices to help keep\nmanaged databases healthy and optimized. T hey ensure\noperation

### Semantic Search with Filter

In [13]:
query = "MongoDB Atlas secutiry"

results = vector_store.similarity_search_with_score(
    query = query,
    k = 3,
    pre_filter = { "page": { "$eq": 17 } }
)

pprint.pprint(results)

[(Document(metadata={'_id': '66ece73cdf3f150de153174d', 'source': 'https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP', 'page': 17}, page_content='To ensure a secure system right out of the b ox,\nauthentication and I P Address whitelisting are\nautomatically enabled.\nReview the security section of the MongoD B Atlas'),
  0.9355283379554749),
 (Document(metadata={'_id': '66ece73cdf3f150de1531748', 'source': 'https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP', 'page': 17}, page_content='Security\nAs with all software, MongoD B administrators must\nconsider security and risk e xposure for a MongoD B\ndeployment. T here are no magic solutions for risk'),
  0.9219629764556885),
 (Document(metadata={'_id': '66ece73cdf3f150de153174a', 'source': 'https://query.prod.cms.rt.microsoft.com/cms/api/am/binary/RE4HkJP', 'page': 17}, page_content='number of diff erent methods for managing risk and\nreducing risk e xposure.\nMongoD B Atlas f eatures e xtensive capabilit

### Basic RAG

This example does the following:

- Instantiates Atlas Vector Search as a retriever to query for similar documents, including the optional k parameter to search for only the 10 most relevant documents.

- Defines a LangChain prompt template to instruct the LLM to use these documents as context for your query. LangChain passes these documents to the {context} input variable and your query to the {question} variable.

- Constructs a chain that specifies the following:

  - Atlas Vector Search as the retriever to search for documents that are used as context by the LLM.

  - The prompt template that you constructed.

  - OpenAI's chat model as the LLM used to generate a context-aware response.

- Prompts the chain with a sample query about Atlas security recommendations.

- Returns the LLM's response and the documents used as context. The generated response might vary.


In [16]:
# Instantiate the Vector Search as a Retriever
retriever = vector_store.as_retriever(
    search_type = "similarity",
    search_kwargs = { "k": 10 }
)

# Define a prompt template
template = """
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
{context}
Question: {question}
"""
custom_rag_prompt = PromptTemplate.from_template(template)

llm = ChatOpenAI(model="gpt-4o-mini")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# construct a chain to answer questions on your data
rag_chain = (
    { "context": retriever | format_docs, "question": RunnablePassthrough() }
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

# Prompt the chain with a query
question = "How can I secure my MongoDB Atlas cluster?"
answer = rag_chain.invoke(question)

print("Question:" + question)
print("Answer:" + answer)

# return source documents
documents = retriever.invoke(question)
print("\nSource documents:")
pprint.pprint(documents)









Question:How can I secure my MongoDB Atlas cluster?
Answer:To secure your MongoDB Atlas cluster, you can take the following steps:

1. **Enable Authentication**: Ensure that authentication is enabled, which is automatically done by MongoDB Atlas.

2. **IP Address Whitelisting**: Use IP address whitelisting to restrict access to your cluster, which is also automatically enabled.

3. **Encryption**: Utilize the built-in encryption of data at rest with encrypted storage volumes. Optionally, configure an additional layer of encryption on your data at rest using MongoDB.

4. **Monitor Logs**: Regularly review application logs and database logs for any suspicious activity.

5. **Replica Sets**: Set up replica sets with a minimum of 3 nodes to enhance protection against database downtime and ensure high availability.

6. **Follow Best Practices**: Incorporate best practices for managing and securing your databases as recommended by MongoDB Atlas.

7. **Continuous Monitoring**: Take advantage 

### RAG with Filtering

This example does the following:

- Instantiates Atlas Vector Search as a retriever to query for similar documents, including the following optional parameters:

  - `k` to search for only the `10` most relevant documents.

  - `score_threshold` to use only documents with a relevance score above `0.75`.

    - Note
    This parameter refers to a relevance score that Langchain uses to normalize your results, and not the relevance score used in Atlas Search queries. To use Atlas Search scores in your RAG implementation, define a custom retriever that uses the similarity_search_with_score method and filters by the Atlas Search score.

  - `pre_filter` to filter on the  `page` field for documents that appear on page `17` only.

- Defines a LangChain prompt template to instruct the LLM to use these documents as context for your query. LangChain passes these documents to the  `{context}` input variable and your query to the `{question}` variable.

- Constructs a chain that specifies the following:

  - Atlas Vector Search as the retriever to search for documents that are used as context by the LLM.

  - The prompt template that you constructed.

  - OpenAI's chat model as the LLM used to generate a context-aware response.

- Prompts the chain with a sample query about Atlas security recommendations.

- Returns the LLM's response and the documents used as context. The generated response might vary.


In [23]:
# Instantiate Atlas Vector Search as a retriever
retriever = vector_store.as_retriever(
   search_type = "similarity_score_threshold",
   search_kwargs = {
      "k": 10,
      "score_threshold": 0.75,
      "pre_filter": { "page": { "$eq": 17 } }
   }
)

# Define a prompt template
template = """
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
{context}
Question: {question}
"""
custom_rag_prompt = PromptTemplate.from_template(template)

llm = ChatOpenAI(model="gpt-4o-mini")

def format_docs(docs):
   return "\n\n".join(doc.page_content for doc in docs)

# Construct a chain to answer questions on your data
rag_chain = (
   { "context": retriever | format_docs, "question": RunnablePassthrough()}
   | custom_rag_prompt
   | llm
   | StrOutputParser()
)

# Prompt the chain
question = "How can I secure my MongoDB Atlas cluster?"
answer = rag_chain.invoke(question)

print("Question: " + question)
print("Answer: " + answer)

# Return source documents
documents = retriever.invoke(question)
print("\nSource documents:")
pprint.pprint(documents)

No relevant docs were retrieved using the relevance score threshold 0.75


Question: How can I secure my MongoDB Atlas cluster?
Answer: To secure your MongoDB Atlas cluster, consider implementing the following best practices:

1. **Enable IP Whitelisting**: Restrict access to your cluster by specifying which IP addresses can connect. You can add specific IP addresses or ranges in the Network Access section of your Atlas project.

2. **Use Database Users with Role-Based Access Control**: Create database users with specific roles that grant only the necessary permissions. Avoid using the default admin user for application access.

3. **Enable Authentication**: Ensure that authentication is enabled for your cluster. MongoDB Atlas enables authentication by default, but it's important to configure it correctly.

4. **Use TLS/SSL for Data Encryption in Transit**: Ensure that connections to your MongoDB Atlas cluster use TLS/SSL to encrypt data in transit.

5. **Enable Encryption at Rest**: MongoDB Atlas provides built-in encryption for data at rest. Make sure this 

No relevant docs were retrieved using the relevance score threshold 0.75



Source documents:
[]
