In [44]:
%pip install -U starpoint openai sentence-transformers tokenizers python-dotenv

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Collecting tokenizers
  Obtaining dependency information for tokenizers from https://files.pythonhosted.org/packages/57/bd/45b5ef6b088880779f70acf60027f7043ca5fa1b98f4a4345cf3aea09044/tokenizers-0.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Using cached tokenizers-0.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Note: you may need to restart the kernel to use updated packages.


In [99]:
import os
import openai
from dotenv import load_dotenv
from starpoint.db import Client

load_dotenv()

OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
openai.api_key = OPENAI_API_KEY
STARPOINT_API_KEY = os.environ["STARPOINT_API_KEY"]
starpoint_client = Client(api_key=STARPOINT_API_KEY)
COLLECTION_NAME = 'dnd'
DESCRIPTION_COLLECTION_NAME = 'dnd-descriptions'
AVERAGED_COLLECTION_NAME = 'dnd-averaged'

In [123]:

def embed(texts):
    from sentence_transformers import SentenceTransformer
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    return model.encode(texts)

def prompt_openai(prompt):
    return openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )["choices"][0]["message"]["content"]

def get_information_retrieval_prompt(query):
    example_query = "What is the saving throw needed to overcome a mud hulks's enveloping slam?"
    example_result = "Information detailing the statistics and conditions relating to the mud hulk's enveloping slam attack."
    example_query2 = "How does sunlight effect kobolds?"
    example_result2 = "Information about the effects of sunlight on kobolds."
    example_query3 = "What is an aerosaur?"
    example_result3 = "A general description of what an aerosaur is."

    return f"""
### Instruction:
You are a helpful assistant that assists with retrieving information. Describe the information that is required to answer the following query relating to monsters.
Do not answer the following query, just describe the information that would be required to answer the query. Do not provide an answer, only a description. 
Answer in a few sentences. Be concise.

### Input:
{example_query}

### Response:
{example_result}


### Input:
{example_query2}

### Response:
{example_result2}

### Input:
{example_query3}

### Response:
{example_result3}

### Input:
{query}

### Response:

            """

def query_starpoint(query, use_descriptor = True, use_hyde = False):
    subjects = prompt_openai(f"What is the following query about? Answer with only the nouns and nothing else. Make a comma separated list. Be concise.\nQUERY: {query}\nNOUNS:")
    print("subject:")
    subjects = subjects.split(",")
    print(subjects)

    query_embedding = []
    if use_descriptor:
        prompt = get_information_retrieval_prompt(query)
        information_descriptor = prompt_openai(prompt)
        query_embedding = embed([information_descriptor])[0].tolist()
        print("information descriptor:")
        print(information_descriptor)
    elif use_hyde:
        hypothetical_answer = prompt_openai(f"You are an information retrieval expert. Please write me an example response for the following query in 30 words or less. Be concise.\nQUERY: {query}\nRESPONSE:")
        query_embedding = embed([hypothetical_answer])[0].tolist()
        print("hypothetical answer:")
        print(hypothetical_answer)
    else:
        query_embedding = embed([query])[0].tolist()

    collection_name = DESCRIPTION_COLLECTION_NAME if use_descriptor else COLLECTION_NAME
    print("collection_name:")
    print(collection_name)
    print(query_embedding)

    relevant_monsters = starpoint_client.query(
        collection_name=collection_name,
        query_embedding=query_embedding,
        sql="SELECT * FROM collection LIMIT 5",
        text_search_query=subjects
    )
    return relevant_monsters['results']


In [101]:
import json

with open('./monster_text_enriched.json') as f:
    monster_texts = json.load(f)

documents_to_upload = []
descriptions_to_upload = []
embeddings = embed([monster["text"] for monster in monster_texts])
description_embeddings = embed([monster["information_description"].strip() for monster in monster_texts])

for index, monster in enumerate(monster_texts):
    documents_to_upload.append({
        "embedding": embeddings[index].tolist(),
        "metadata": monster
    })

for index, monster in enumerate(monster_texts):
    descriptions_to_upload.append({
        "embedding": description_embeddings[index].tolist(),
        "metadata": monster
    })

print(documents_to_upload[:10])
print(descriptions_to_upload[:10])



[{'embedding': [-0.00767302792519331, 0.09029876440763474, 0.016280265524983406, 0.034027617424726486, -0.06770571321249008, -0.05749347060918808, 0.08869515359401703, -0.004772051237523556, 0.010783006437122822, 0.04090312868356705, 0.022324634715914726, -0.05183158814907074, -0.07775594294071198, 0.054341062903404236, -0.031153596937656403, 0.040392689406871796, 0.019327089190483093, 0.05059783160686493, 0.03171854838728905, 0.0801968052983284, -0.011472939513623714, 0.006411905400454998, -0.03707345575094223, 0.07620230317115784, -0.05939263477921486, -0.08103334903717041, -0.11662179976701736, 0.03617577627301216, 0.03235242888331413, -0.00805383175611496, -0.032861895859241486, -4.7729103243909776e-05, -0.06474066525697708, -0.011835134588181973, -0.022598061710596085, 0.005882310215383768, 0.05307728424668312, -0.03724130243062973, 0.010039076209068298, 0.015071186237037182, 0.0060347565449774265, 0.0223959069699049, -0.03177521377801895, 0.02786925807595253, -0.03270523995161056

In [102]:
document_dimensions = len(documents_to_upload[0]["embedding"])
description_dimensions = len(descriptions_to_upload[0]["embedding"])
starpoint_client.create_collection(COLLECTION_NAME, document_dimensions)
starpoint_client.create_collection(DESCRIPTION_COLLECTION_NAME, description_dimensions)

{'id': '50ba9395-ca66-42f3-903d-7b955e8178c0',
 'name': 'dnd-descriptions',
 'dimensionality': 384}

In [103]:
starpoint_client.insert(documents=documents_to_upload, collection_name=COLLECTION_NAME)
starpoint_client.insert(documents=descriptions_to_upload, collection_name=DESCRIPTION_COLLECTION_NAME)

{'collection_id': '50ba9395-ca66-42f3-903d-7b955e8178c0',
 'documents': [{'id': '5x721ig9es8w'},
  {'id': '7mo07pgbhufa'},
  {'id': 'g20xz8qxbeyf'},
  {'id': 'lbp5ieh0al5r'},
  {'id': '8ob7rbu6suiy'},
  {'id': 'knjh1xqfvjjv'},
  {'id': '34d1b5jdvc3k'},
  {'id': '5kydlhhi2g21'},
  {'id': 'ctk7idlec5tc'},
  {'id': 'qt21u5jt3ujn'},
  {'id': 'e4qk13tz2ctn'},
  {'id': '9r546z1r8viw'},
  {'id': '6a4b6fd14j3e'},
  {'id': 'nv24kd71js89'},
  {'id': 'antw6fugi5dy'},
  {'id': 'bs4vly4k3y0h'},
  {'id': '7zmes569loz4'},
  {'id': 'wzfdvbqlvakr'},
  {'id': '8a74j7ey0vb1'},
  {'id': 'bg4ix9cj6r4c'},
  {'id': 'lbbrgk18nqwr'},
  {'id': 'rf7miixxsjm4'},
  {'id': 'csh9ul00eq73'},
  {'id': 'zjdfy1qp75ij'},
  {'id': 'wio5cvuwjr1i'},
  {'id': 'xvcke9j3aoeg'},
  {'id': 'j5jkcl3ie4gm'},
  {'id': 'vphptzwkdiiq'},
  {'id': 'k4lkdyu8w87i'},
  {'id': 'n9pzjh1yx1y3'},
  {'id': 'qgj9z0a1b767'},
  {'id': 'ibth6an7h3lg'},
  {'id': 'i3o169jthvi5'},
  {'id': 'dt0ab22dpmst'},
  {'id': 'fprpv52bk9mf'},
  {'id': 'wconsh0xx

In [133]:
# question = "What does an abyssal chicken have damage resistance to?"
# question = "How do I summon an abyssal chicken?"
# question = "What attacks does an abyssal chicken have?"
question = "What do Abyssal Chickens eat?"
# question = "How does sunlight effect kobolds?"

results = query_starpoint(question, use_descriptor=True, use_hyde=False)

for result in results:
    print('')
    print(result['monster_name'])
    print(result.get('trait_name', 'None'))
    print(result['information_description'])
    print(result['text'])
    print('')

subject:
['Abyssal Chickens', ' food']
information descriptor:
Information about the diet of Abyssal Chickens.
collection_name:
dnd-descriptions
[0.00795720238238573, -0.02581181190907955, 0.00033912094659172, 0.07807400077581406, -0.07644201815128326, -0.05719142407178879, -0.008391845040023327, 0.004136561416089535, 0.026090288534760475, -0.060537777841091156, -0.02716582454741001, -0.05096441134810448, -0.10687608271837234, 0.028445737436413765, -0.10351800173521042, 0.020280523225665092, 0.08664656430482864, -0.02040203846991062, -0.03651721775531769, -0.020258478820323944, 0.06244051828980446, 0.014899746514856815, 0.055752504616975784, -0.015992579981684685, -0.04286559671163559, -0.024683907628059387, -0.007960991002619267, 0.029746506363153458, -0.030396824702620506, -0.06241687387228012, -0.00816733855754137, -0.015689382329583168, 0.10891719162464142, -0.028428267687559128, -0.0004621406551450491, 0.010307678952813148, 0.07986316829919815, -0.07852857559919357, 0.038386266678