In [1]:
%pip install -U starpoint openai sentence-transformers tokenizers python-dotenv

Collecting starpoint
  Obtaining dependency information for starpoint from https://files.pythonhosted.org/packages/68/de/03a3d84fa69b27e817fd9336af9476899113d48981116b470d130b358d27/starpoint-0.3.1-py3-none-any.whl.metadata
  Downloading starpoint-0.3.1-py3-none-any.whl.metadata (1.5 kB)
Collecting openai
  Obtaining dependency information for openai from https://files.pythonhosted.org/packages/ae/59/911d6e5f1d7514d79c527067643376cddcf4cb8d1728e599b3b03ab51c69/openai-0.28.0-py3-none-any.whl.metadata
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Collecting tokenizers
  Obtaining dependency information for tokenizers from https://files.pythonhosted.org/packages/48/eb/9a9404720ced1f26f156b5ffcbdb65946621b1111c2cfdd374ee7e7a938e/tokenizers-0.14.0-cp311-cp311-macosx_11_0_arm64.whl.metadata
  Downloading tokenizers-0.14.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (6.7 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Downloading starpoin

In [3]:
import os
import openai
from dotenv import load_dotenv
from starpoint.db import Client

load_dotenv()

OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
openai.api_key = OPENAI_API_KEY
STARPOINT_API_KEY = os.environ["STARPOINT_API_KEY"]
starpoint_client = Client(api_key=STARPOINT_API_KEY)
COLLECTION_NAME = 'dnd'

In [None]:

def embed(texts):
    from sentence_transformers import SentenceTransformer
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    return model.encode(texts)

def prompt_openai(prompt):
    return openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )["choices"][0]["message"]["content"]

def query_starpoint(query):
    hypothetical_answer = prompt_openai(f"Answer the following question in less than 80 words: {query}")
    query_embedding = embed([hypothetical_answer])[0].tolist()
    relevant_monsters = starpoint_client.query(
        collection_name=COLLECTION_NAME,
        query_embedding=query_embedding,
        sql="SELECT * FROM collection LIMIT 10"
    )
    return relevant_monsters

In [None]:
import json
import requests
from tokenizers import Tokenizer

tokenizer = Tokenizer.from_pretrained("bert-base-cased")

with open('./monster_text.json') as f:
    monster_texts = json.load(f)
    monster_texts = []

    documents_to_upload = []
    embeddings = embed([monster["text"] for monster in monster_texts])

    for index, monster in enumerate(monster_texts):
        documents_to_upload.append({
            "embedding": embeddings[index].tolist(),
            "metadata": monster
        })

    print(documents_to_upload[:10])

    starpoint_client.insert(documents=documents_to_upload, collection_name=COLLECTION_NAME)

In [None]:
resp = query_starpoint("where are Balors found?")
print(resp)