In [2]:
!unzip data.zip -d data/

In [5]:
# we're going to write a utility function to flatten out the entries
# in any given data, this pattern is used often in 5e tools
def flatten_entries(data):
    # Base case: If data is a string, return it in a list
    if isinstance(data, str):
        return [data]

    # For lists: iterate through each item and flatten
    if isinstance(data, list):
        result = []
        for item in data:
            result.extend(flatten_entries(item))
        return result

    # For dictionaries: look for the "entries" key and flatten its content
    if isinstance(data, dict):
        if "entries" in data:
            return flatten_entries(data["entries"])
        return []
    return []

In [6]:
import os
import json

# list all of the files in the bestiary data directory
# filtering out the ones that are fluff or purely metadata
# with no relevant monster information
base_bestiary_files = list(filter(lambda x: x.startswith('bestiary'), os.listdir('data/bestiary')))

# fluff files (monster descriptions are separate from the core data files)
# so we're going to join them all together into one big dictionary
# keyed on the monster name + source (there are sometimes duplicate names)
monsters = {}

for base_file in base_bestiary_files: 
    with open('data/bestiary/' + base_file, 'r') as f:
        # json parse the file and then get the monster data
        # from the json
        monster_data = json.loads(f.read())['monster']

        # iterate through the monster data
        for monster in monster_data:
            key = monster['name'] + '|' + monster['source']
            # if the monster name is already in the dictionary
            # then we need to merge the data
            if key in monster_data:
                # merge the data
                monsters[key] = {**monsters[key], **monster}
            else:
                # otherwise just add the data to the dictionary
                monsters[key] = monster


In [7]:
# now we want to get the monster descriptions
# and add them to the dictionary
for fluff_file in filter(lambda x: x.startswith('fluff-bestiary'), os.listdir('data/bestiary')):
    with open('data/bestiary/' + fluff_file, 'r') as f:
        fluff_data = json.loads(f.read())['monsterFluff']

        # iterate through the fluff data
        for fluff in fluff_data:
            key = fluff['name'] + '|' + fluff['source']
            # if the monster name is already in the dictionary
            # then we need to merge the data
            if 'entries' in fluff and key in monsters:
                # merge the data
                monsters[key]['descriptions'] = flatten_entries(fluff['entries'])


In [8]:
# print the first 10 monsters
for key, value in list(monsters.items())[:10]:
    # pretty print the monster data
    print(json.dumps(value, indent=4))

In [9]:
from collections import defaultdict

monster_texts = []

def add_text_to_embed(key, field, monster_entry):
    text = ""
    for trait in monster_entry.get(field):
        if type(trait) == str:
            text += trait + "\n"
            continue
        if trait.get("name") is not None:
            text += trait["name"] + "\n"
        for entry in trait["entries"]:
            text += "\n".join(flatten_entries(entry)) + "\n"
    [monster_name, monster_source] = key.split("|")
    monster_texts.append({
        "monster_name": monster_name,
        "monster_source": monster_source,
        "field": field,
        "text": text
    })

for key, monster in monsters.items():
    for field in ["trait", "action", "legendary", "bonus", "descriptions"]:
        if field in monster and monster[field] is not None:
            add_text_to_embed(key, field, monster)

for item in monster_texts[:10]:
    print(item)

Dump the monster text to be separately processed

In [11]:
with open("./monster_text.json", "w") as fp:
    json.dump(monster_texts, fp)