# Ask the docs anything about SuperDuperDB

In [None]:
!pip install pinnacledb

In [2]:
import os
os.environ['OPENAI_API_KEY'] = '<YOUR-OPENAI-API-KEY>'

In [3]:
from pinnacledb import pinnacle
from pinnacledb.db.mongodb.query import Collection

# Uncomment one of the following lines to use a bespoke MongoDB deployment
# For testing the default connection is to mongomock

mongodb_uri = os.getenv("MONGODB_URI","mongomock://test")
# mongodb_uri = "mongodb://localhost:27017"
# mongodb_uri = "mongodb://pinnacle:pinnacle@mongodb:27017/documents"
# mongodb_uri = "mongodb://<user>:<pass>@<mongo_cluster>/<database>"
# mongodb_uri = "mongodb+srv://<username>:<password>@<atlas_cluster>/<database>"

# Super-Duper your Database!
from pinnacledb import pinnacle
db = pinnacle(mongodb_uri)

collection = Collection('questiondocs')

INFO:numexpr.utils:NumExpr defaulting to 8 threads.


In [None]:
import glob

STRIDE = 5       # stride in numbers of lines
WINDOW = 10       # length of window in numbers of lines

content = sum([open(file).readlines() for file in glob.glob('../*/*.md') + glob.glob('../*.md')], [])
chunks = ['\n'.join(content[i: i + WINDOW]) for i in range(0, len(content), STRIDE)]

In [None]:
from IPython.display import Markdown
Markdown(chunks[2])

In [None]:
from pinnacledb.container.document import Document

db.execute(collection.insert_many([Document({'txt': chunk}) for chunk in chunks]))

In [None]:
db.execute(collection.find_one())

In [None]:
from pinnacledb.container.vector_index import VectorIndex
from pinnacledb.container.listener import Listener
from pinnacledb.ext.openai.model import OpenAIEmbedding

db.add(
    VectorIndex(
        identifier='my-index',
        indexing_listener=Listener(
            model=OpenAIEmbedding(model='text-embedding-ada-002'),
            key='txt',
            select=collection.find(),
        ),
    )
)

In [None]:
from pinnacledb.ext.openai.model import OpenAIChatCompletion

chat = OpenAIChatCompletion(
    model='gpt-3.5-turbo',
    prompt=(
        'Use the following description and code-snippets aboout SuperDuperDB to answer this question about SuperDuperDB\n'
        'Do not use any other information you might have learned about other python packages\n'
        'Only base your answer on the code-snippets retrieved\n'
        '{context}\n\n'
        'Here\'s the question:\n'
    ),
)

db.add(chat)

print(db.show('model'))

In [None]:
db.show('model', 'gpt-3.5-turbo')

In [None]:
from pinnacledb.container.document import Document
from IPython.display import display, Markdown


q = 'Can you give me a code-snippet to set up a `VectorIndex`?'

output, context = db.predict(
    model='gpt-3.5-turbo',
    input=q,
    context_select=(
        collection
            .like(Document({'txt': q}), vector_index='my-index', n=5)
            .find()
    ),
    context_key='txt',
)

Markdown(output.content)