In [26]:
%pip install -U sentence-transformers starpoint

Note: you may need to restart the kernel to use updated packages.


In [None]:
!unzip data.zip -d data/

In [27]:
# we're going to write a utility function to flatten out the entries
# in any given data, this pattern is used often in 5e tools
def flatten_entries(data):
    # Base case: If data is a string, return it in a list
    if isinstance(data, str):
        return [data]

    # For lists: iterate through each item and flatten
    if isinstance(data, list):
        result = []
        for item in data:
            result.extend(flatten_entries(item))
        return result

    # For dictionaries: look for the "entries" key and flatten its content
    if isinstance(data, dict):
        if "entries" in data:
            return flatten_entries(data["entries"])
        return []
    return []

In [28]:
import os
import json

# list all of the files in the bestiary data directory
# filtering out the ones that are fluff or purely metadata
# with no relevant monster information
base_bestiary_files = list(filter(lambda x: x.startswith('bestiary'), os.listdir('data/bestiary')))

# fluff files (monster descriptions are separate from the core data files)
# so we're going to join them all together into one big dictionary
# keyed on the monster name + source (there are sometimes duplicate names)
monsters = {}

for base_file in base_bestiary_files: 
    with open('data/bestiary/' + base_file, 'r') as f:
        # json parse the file and then get the monster data
        # from the json
        monster_data = json.loads(f.read())['monster']

        # iterate through the monster data
        for monster in monster_data:
            key = monster['name'] + '-' + monster['source']
            # if the monster name is already in the dictionary
            # then we need to merge the data
            if key in monster_data:
                # merge the data
                monsters[key] = {**monsters[key], **monster}
            else:
                # otherwise just add the data to the dictionary
                monsters[key] = monster


In [29]:
# now we want to get the monster descriptions
# and add them to the dictionary
for fluff_file in filter(lambda x: x.startswith('fluff-bestiary'), os.listdir('data/bestiary')):
    with open('data/bestiary/' + fluff_file, 'r') as f:
        fluff_data = json.loads(f.read())['monsterFluff']

        # iterate through the fluff data
        for fluff in fluff_data:
            key = fluff['name'] + '-' + fluff['source']
            # if the monster name is already in the dictionary
            # then we need to merge the data
            if 'entries' in fluff and key in monsters:
                # merge the data
                monsters[key]['descriptions'] = flatten_entries(fluff['entries'])


In [30]:
# print the first 10 monsters
for key, value in list(monsters.items())[:10]:
    # pretty print the monster data
    print(json.dumps(value, indent=4))


{
    "name": "Fume Drake",
    "source": "DoSI",
    "page": 41,
    "size": [
        "S"
    ],
    "type": "elemental",
    "alignment": [
        "N"
    ],
    "alignmentPrefix": "typically ",
    "ac": [
        12
    ],
    "hp": {
        "average": 22,
        "formula": "5d6 + 5"
    },
    "speed": {
        "walk": 30,
        "fly": 30
    },
    "str": 6,
    "dex": 14,
    "con": 12,
    "int": 6,
    "wis": 10,
    "cha": 11,
    "senses": [
        "darkvision 60 ft."
    ],
    "passive": 10,
    "immune": [
        "fire",
        "poison"
    ],
    "conditionImmune": [
        "poisoned"
    ],
    "languages": [
        "Draconic",
        "Ignan"
    ],
    "cr": "1/4",
    "trait": [
        {
            "name": "Death Burst",
            "entries": [
                "When the fume drake dies, it explodes in a cloud of noxious fumes. Each creature within 5 feet of the fume drake must succeed on a {@dc 11} Constitution saving throw or take 4 ({@damage 1d8}) po

In [34]:
from collections import defaultdict

monster_texts = defaultdict(dict)

def add_text_to_embed(key, field, monster_entry):
    text = ""
    for trait in monster_entry.get(field):
        if type(trait) == str:
            text += trait + "\n"
            continue
        if trait.get("name") is not None:
            text += trait["name"] + "\n"
        for entry in trait["entries"]:
            text += "\n".join(flatten_entries(entry)) + "\n"
    monster_texts[key][field] = text

for key, monster in monsters.items():
    for field in ["trait", "action", "legendary", "bonus", "descriptions"]:
        if field in monster and monster[field] is not None:
            add_text_to_embed(key, field, monster)

for key, value in list(monster_texts.items())[:10]:
    print(key, value)

Fume Drake-DoSI {'trait': "Death Burst\nWhen the fume drake dies, it explodes in a cloud of noxious fumes. Each creature within 5 feet of the fume drake must succeed on a {@dc 11} Constitution saving throw or take 4 ({@damage 1d8}) poison damage.\nUnusual Nature\nThe fume drake doesn't require food, drink, or sleep.\n", 'action': 'Bite\n{@atk mw} {@hit 4} to hit, reach 5 ft., one target. {@h}4 ({@damage 1d4 + 2}) fire damage.\nScalding Breath {@recharge}\nThe fume drake exhales a 15-foot cone of scalding steam. Each creature in that area must make a {@dc 11} Dexterity saving throw, taking 4 ({@damage 1d8}) fire damage on a failed save, or half as much damage on a successful one.\n', 'descriptions': 'Fume drakes are mischievous creatures that arise from the lingering magical energy of a dead dragon. They resemble small, legless dragons formed from clouds of greenish smoke. They delight in causing pain and confusion in other creatures.\n'}
Kobold Tinkerer-DoSI {'trait': "Inquiring Mind (

In [42]:
from sentence_transformers import SentenceTransformer

embeddings_data = defaultdict(dict)

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
for key, texts in monster_texts.items():
    for text_field, text in texts.items():
        embedding = model.encode(text)
        embeddings_data[key][text_field] = embedding

In [None]:
import copy
from starpoint.db import Client

COLLECTION_NAME = 'dnd'
API_KEY = '2a3ed27a-ced2-4ad6-b411-511a5c71f6e3'

documents_to_upload = []

for key, embeddings in embeddings_data.items():
    metadata = monsters[key]
    for text_field, embedding in embeddings.items():
        new_metadata = copy.deepcopy(metadata)
        new_metadata['embedding_source'] = text_field
        documents_to_upload.append({
            'metadata': new_metadata,
            'embedding': embedding.tolist()
        })

client = Client(api_key=API_KEY)
client.insert(documents=documents_to_upload, collection_name=COLLECTION_NAME)


{'collection_id': '7eda4d2c-1f99-44ff-bc0f-5f2d49ce7397', 'documents': []}