# MongoDB Reader
Demonstrates our MongoDB data connector

In [None]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [1]:
from llama_index import ListIndex, SimpleMongoReader
from IPython.display import Markdown, display
import os

In [None]:
host = "<host>"
port = "<port>"
db_name = "<db_name>"
collection_name = "<collection_name>"
# query_dict is passed into db.collection.find()
query_dict = {}
field_names = ["text"]
reader = SimpleMongoReader(host, port)
documents = reader.load_data(db_name, collection_name, field_names, query_dict=query_dict)

In [None]:
index = ListIndex.from_documents(documents)

In [None]:
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()
response = query_engine.query("<query_text>")

In [None]:
display(Markdown(f"<b>{response}</b>"))

## MongoDB Atlas

In [2]:
# Provide URI to constructor, or use environment variable
import pymongo
from llama_index.vector_stores.mongodb import MongoDBVectorStore
from llama_index.indices.vector_store.base import VectorStoreIndex
from llama_index.storage.storage_context import StorageContext
from llama_index.readers.file.base import SimpleDirectoryReader



In [9]:
# mongo_uri = os.environ["MONGO_URI"]
mongo_uri = "mongodb+srv://<username>:<password>@<host>?retryWrites=true&w=majority"
mongodb_client = pymongo.MongoClient(mongo_uri)
store = MongoDBVectorStore(mongodb_client)
storage_context = StorageContext.from_defaults(vector_store=store)
uber_docs = SimpleDirectoryReader(input_files=["../data/10k/uber_2021.pdf"]).load_data()
index = VectorStoreIndex.from_documents(uber_docs, storage_context=storage_context)

In [10]:
response = index.as_query_engine().query("What was Uber's revenue?")
display(Markdown(f"<b>{response}</b>"))

<b>
Uber's revenue for 2021 was $17,455 million.</b>

In [11]:
from llama_index.response.schema import Response
# Initial size

print(store._collection.count_documents({}))
# Get a ref_doc_id
typed_response = response if isinstance(response, Response) else response.get_response()
ref_doc_id = typed_response.source_nodes[0].node.ref_doc_id
print(store._collection.count_documents({"metadata.ref_doc_id": ref_doc_id}))
# Test store delete
if ref_doc_id:
    store.delete(ref_doc_id)
    print(store._collection.count_documents({}))

4454
1
4453
