In [None]:
%pip install -U starpoint openai sentence-transformers tokenizers

In [None]:
import os
from starpoint.db import Client

OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
openai.api_key = OPENAI_API_KEY
STARPOINT_API_KEY = os.environ["STARPOINT_API_KEY"]
starpoint_client = Client(api_key=STARPOINT_API_KEY)
COLLECTION_NAME = 'dnd'

In [None]:

def embed(texts):
    from sentence_transformers import SentenceTransformer
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    return model.encode(texts)

def prompt_openai(prompt):
    return openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )["choices"][0]["message"]["content"]

def query_starpoint(query):
    hypothetical_answer = prompt_openai(f"Answer the following question in less than 80 words: {query}")
    query_embedding = embed([hypothetical_answer])[0].tolist()
    relevant_monsters = starpoint_client.query(
        collection_name=COLLECTION_NAME,
        query_embedding=query_embedding,
        sql="SELECT * FROM collection LIMIT 10"
    )
    return relevant_monsters

In [None]:
import json
import requests
from tokenizers import Tokenizer

tokenizer = Tokenizer.from_pretrained("bert-base-cased")

with open('./monster_text.json') as f:
    monster_texts = json.load(f)
    monster_texts = []

    documents_to_upload = []
    embeddings = embed([monster["text"] for monster in monster_texts])

    for index, monster in enumerate(monster_texts):
        documents_to_upload.append({
            "embedding": embeddings[index].tolist(),
            "metadata": monster
        })

    print(documents_to_upload[:10])

    starpoint_client.insert(documents=documents_to_upload, collection_name=COLLECTION_NAME)

In [None]:
resp = query_starpoint("where are Balors found?")
print(resp)