# MongoDB Atlas vector-search with SuperDuperDB

In [None]:
!pip install pinnacledb

In [None]:
import os
os.environ['OPENAI_API_KEY'] = '<YOUR-OPENAI-KEY>'

In [1]:
import os

# Uncomment one of the following lines to use a bespoke MongoDB deployment
# For testing the default connection is to mongomock

mongodb_uri = os.getenv("MONGODB_URI","mongomock://test")
# mongodb_uri = "mongodb://localhost:27017"
# mongodb_uri = "mongodb://pinnacle:pinnacle@mongodb:27017/documents"
# mongodb_uri = "mongodb://<user>:<pass>@<mongo_cluster>/<database>"
# mongodb_uri = "mongodb+srv://<username>:<password>@<atlas_cluster>/<database>"

# Super-Duper your Database!
from pinnacledb import pinnacle
db = pinnacle(mongodb_uri)



In [2]:
!curl -O https://pinnacledb-public.s3.eu-west-1.amazonaws.com/pymongo.json

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  120k  100  120k    0     0   191k      0 --:--:-- --:--:-- --:--:--  192k


In [3]:
import json
from pinnacledb.db.mongodb.query import Collection
from pinnacledb.container.document import Document as D

with open('pymongo.json') as f:
    data = json.load(f)

db.execute(Collection('documents').insert_many([D(r) for r in data]))

INFO:root:found 0 uris
INFO:root:Adding model text-embedding-ada-002 to db
INFO:root:Done.


(<pymongo.results.InsertManyResult at 0x111402830>,
 TaskWorkflow(database=<pinnacledb.db.base.db.DB object at 0x156aee710>, G=<networkx.classes.digraph.DiGraph object at 0x15739e210>))

In [4]:
from pinnacledb.container.vector_index import VectorIndex
from pinnacledb.container.listener import Listener
from pinnacledb.ext.numpy.array import array
from pinnacledb.ext.openai.model import OpenAIEmbedding

model = OpenAIEmbedding(model='text-embedding-ada-002')

db.add(
    VectorIndex(
        identifier=f'pymongo-docs',
        indexing_listener=Listener(
            model=model,
            key='value',
            select=Collection('documents').find(),
            predict_kwargs={'max_chunk_size': 1000},
        ),
    )
)

INFO:root:Adding model text-embedding-ada-002 to db
INFO:root:Done.
527it [00:00, 1057.13it/s]


Computing chunk 0/0


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:11<00:00,  1.84s/it]


[]

In [5]:
from pinnacledb.db.mongodb.query import Collection
from pinnacledb.container.document import Document as D
from IPython.display import *

query = 'Query the database'

result = db.execute(
    Collection('documents')
        .like(D({'value': query}), vector_index='pymongo-docs', n=5)
        .find()
)

for r in result:
    display(Markdown(f'### `{r["parent"] + "." if r["parent"] else ""}{r["res"]}`'))
    display(Markdown(r['value']))

### `c[name] || c.name.find`


Query the database.

The filter argument is a query document that all results
must match. For example:

```pycon
>>> db

### `pymongo.monitoring.CommandStartedEvent.database_name`


The name of the database this command was run against.



### `db[collection_name] || db.collection_name.aggregate`


Perform a database-level aggregation.

See the [aggregation pipeline](https://mongodb.com/docs/manual/reference/operato

### `pymongo.change_stream.ChangeStream.alive`


Does this cursor have the potential to return more data?



### `db[collection_name] || db.collection_name.command`


