In [21]:
%pip install -U starpoint openai sentence-transformers tokenizers python-dotenv

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Collecting tokenizers
  Obtaining dependency information for tokenizers from https://files.pythonhosted.org/packages/48/eb/9a9404720ced1f26f156b5ffcbdb65946621b1111c2cfdd374ee7e7a938e/tokenizers-0.14.0-cp311-cp311-macosx_11_0_arm64.whl.metadata
  Using cached tokenizers-0.14.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (6.7 kB)
Note: you may need to restart the kernel to use updated packages.


In [23]:
import os
import openai
from dotenv import load_dotenv
from starpoint.db import Client

load_dotenv()

OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
openai.api_key = OPENAI_API_KEY
STARPOINT_API_KEY = os.environ["STARPOINT_API_KEY"]
starpoint_client = Client(api_key=STARPOINT_API_KEY)
COLLECTION_NAME = 'dnd'

In [37]:

def embed(texts):
    from sentence_transformers import SentenceTransformer
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    return model.encode(texts)

def prompt_openai(prompt):
    return openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )["choices"][0]["message"]["content"]

def query_starpoint(query):
    hypothetical_answer = prompt_openai(f"Answer the following query in less than 30 words: {query}")
    print("hypothetical answer:")
    print(hypothetical_answer)
    entity = prompt_openai(f"What is the subject of the following query, answer with only the subject and nothing else: {hypothetical_answer}")
    print("subject:")
    print(entity)

    query_embedding = embed([hypothetical_answer])[0].tolist()
    relevant_monsters = starpoint_client.query(
        collection_name=COLLECTION_NAME,
        query_embedding=query_embedding,
        sql="SELECT * FROM collection LIMIT 10"
    )
    return relevant_monsters

In [26]:
import json
import requests
from tokenizers import Tokenizer

tokenizer = Tokenizer.from_pretrained("bert-base-cased")

with open('./monster_text.json') as f:
    monster_texts = json.load(f)

    documents_to_upload = []
    embeddings = embed([monster["text"] for monster in monster_texts])

    for index, monster in enumerate(monster_texts):
        documents_to_upload.append({
            "embedding": embeddings[index].tolist(),
            "metadata": monster
        })

    print(documents_to_upload[:10])

    starpoint_client.insert(documents=documents_to_upload, collection_name=COLLECTION_NAME)

[{'embedding': [0.08792936056852341, 0.021912479773163795, 0.02781306952238083, -0.0004003315989393741, 0.11591733992099762, -0.0015614095609635115, 0.049459367990493774, 0.04009472206234932, -0.01830078288912773, 0.010453568771481514, 0.059820301830768585, -0.05280715227127075, 0.007169472519308329, -0.0013736675027757883, -0.010859706439077854, 0.013179056346416473, -0.04156523942947388, 0.0023036482743918896, -0.0950997844338417, 0.02696395106613636, 0.05475945398211479, 0.09896155446767807, -0.039943307638168335, 0.0685221329331398, -0.12566116452217102, -0.05882980301976204, -0.05982360243797302, 0.02094549499452114, 0.022806543856859207, -0.11518490314483643, 0.038549747318029404, -0.011411153711378574, -0.057755839079618454, 0.008965064771473408, 0.0019362810999155045, -0.02141246199607849, -0.007965165190398693, -0.009265422821044922, 0.02784118428826332, -0.025825561955571175, -0.02003360539674759, 0.04986350238323212, -0.03496255353093147, 0.0075513143092393875, -0.0270388331

In [38]:
resp = query_starpoint("where do balors live?")

hypothetical answer:
Balors live in the Abyss.
subject:
Balors


In [39]:
import pprint 

pp = pprint.PrettyPrinter(indent=4)
pp.pprint([result['monster_name'] + ' - ' + result['text'] for result in resp["results"]])

[   "Yeenoghu - Yeenoghu's lair in the Abyss is called the Death Dells, its "
    'barren hills and ravines serving as one great hunting ground, where he '
    "pursues captured mortals in a cruel game. Yeenoghu's lair is a place of "
    'blood and death, populated by {@creature gnoll||gnolls}, {@creature '
    'hyena||hyenas}, and {@creature ghoul||ghouls}, and there are few '
    'structures or signs of civilization on his layer of the Abyss.',
    "Yeenoghu - Yeenoghu's lair in the Abyss is called the Death Dells. Its "
    'barren hills and ravines serve as a hunting ground, where he pursues '
    "captured mortals in a cruel game. Yeenoghu's lair is a place of blood and "
    'death, populated by {@creature gnoll||gnolls}, {@creature hyena||hyenas}, '
    'and {@creature ghoul||ghouls}, and there are few structures or signs of '
    'civilization on his layer of the Abyss.',
    'Riverine - Amphibious\nThe riverine can breathe air and water.\n',
    'Balhannoth - Native to the Sh