In [1]:
import pandas as pd
import simplejson
from math import isnan
import re

In [11]:
monsters = pd.read_csv("pathfinder/monster_bestiary_full - Updated 27Jul2015.csv")

In [12]:
monsters = monsters[monsters.Source.apply(lambda x: "Tome of Horrors" not in x)]

In [40]:
monsters.set_index("Name")

monsters = monsters.applymap(lambda x: re.sub('\s\s+', ' ', x) if(type(x) == str) else x)
monsters = monsters.applymap(lambda x: re.sub('–', '-', x) if(type(x) == str) else x)
monsters = monsters.applymap(lambda x: re.sub(',$', '', x) if(type(x) == str) else x)
monsters = monsters.applymap(lambda x: re.sub('([+-])\s', '\\1', x) if(type(x) == str) else x)

monsters.loc[monsters.Name == 'Ancient Red Dragon', 'AC'] = '38, touch 5, flat-footed 38'
monsters.loc[monsters.Name == 'Fire Giant', 'AC'] = '24, touch 8, flat-footed 24'
monsters.loc[monsters.Name == 'Pixie', 'AC'] = '18, touch 17, flatfooted 12'
monsters.loc[monsters.Name == 'Aerial Servant', 'Melee'] = 'slam +25 (2d8+12 plus grab)'
monsters.loc[monsters.Name == 'Purrodaemon', 'Melee'] = '+2 wounding halberd +32/+27/+22/+17 (2d8+18/19–20/×3), bite +24 (1d8+5)'
monsters.loc[monsters.Name == 'Slurk', 'Ranged'] = 'slime squirt +4 ranged touch (slime)'
monsters.loc[monsters.Name == 'Flaming Skull', 'Melee'] = 'slam +1 (1d2 plus burn 1d6)'
monsters.loc[monsters.Name == 'Changeling', 'Melee'] = '2 claws +0 (1d4+1)'
monsters.loc[monsters.Name == 'Hungry Fog', 'Melee'] = 'touch +5 touch (6d6 negative energy)'


raw_dict = monsters.to_dict(orient = 'records')

In [35]:
def clean_nones(messy_dict):
    """
    Recursively remove all None values from dictionaries and lists, and returns
    the result as a new dictionary or list.
    """
    cleanish_dict = simplejson.loads(simplejson.dumps(messy_dict, ignore_nan=True))
    if isinstance(cleanish_dict, list):
        return [clean_nones(x) for x in cleanish_dict if x is not None]
    elif isinstance(cleanish_dict, dict):
        return {
            key: clean_nones(val)
            for key, val in cleanish_dict.items()
            if val is not None
        }
    else:
        return cleanish_dict

In [36]:
monster_dict = clean_nones(raw_dict)

In [45]:
def process_attack_string(attack_string):
    attack_string = re.sub('([0-9])([A-Za-z])', '\\1 \\2', attack_string)
    attack_string = re.sub('([0-9])([(])', '\\1 \\2', attack_string)
    attack_string = re.sub('([a-z])([+-])', '\\1 \\2', attack_string)    
    attack_string = re.split(',? or (?![^()]*\))', attack_string, re.IGNORECASE)[0]
    attacks = re.split(',\s*(?![^()]*\))', attack_string)
    attack_list = []
    for attack_string in attacks:
        count = re.search('^[0-9]+', attack_string.strip())
        attack_string = re.sub('(melee )|(ranged )', '', attack_string)
        if ('swarm' in attack_string) or ('horde' in attack_string):
            attack_list.append({'special' : attack_string})
        elif 'see below' in attack_string:
            attack_list.append({'special' : attack_string})
        else:
            if(count):
                attack_string = re.sub('^' + count[0], '', attack_string).strip()
                count = int(count[0])
            else:
                count = 1
            attack_name, attack_bonus, touch, attack_damage = re.search('^(?:\(rage\))?(.+)\s([+-/0-9]+)\s(touch\s)?\((.+)\)', attack_string).groups()
            if '/' in attack_bonus:
                attack_bonuses = attack_bonus.split('/')
                for attack_bonus in attack_bonuses:
                    attack_list.append({'name': attack_name, 'hit': int(attack_bonus), 'damage': attack_damage})
            else:
                if count > 1:
                    attack_name = attack_name[:-1]
                for c in range(count):
                    attack_list.append({'name': attack_name, 'hit': int(attack_bonus), 'damage': attack_damage})
    return(attack_list)
    
def process_attacks(monster):
    attacks = {}
    if 'Melee' in monster.keys():
        attacks['melee'] = process_attack_string(monster['Melee'])
    if 'Ranged' in monster.keys():
        attacks['ranged'] = process_attack_string(monster['Ranged'])
    return(attacks)
    
def process_feats(monster):
    if 'Feats' in monster.keys():
        feats = monster['Feats'].split(', ')
    else:
        feats = []
    return(feats)

def process_skills(monster):
    skills = []
    if 'Skills' in monster.keys():
        if monster['Skills'] != '(see below)':
            skill_strings = re.split(',\s*(?![^()]*\))', monster['Skills'])
            for skill_string in skill_strings:
                skill_name = re.search('^[^-+]+', skill_string)[0].strip()
                skill_bonus = int(re.search('\+?(\-?[0-9]+)', skill_string).groups()[0])
                skills.append({skill_name: skill_bonus})
    return(skills)

def process_attributes(monster):
    ability_scores = re.split(',\s*(?![^()]*\))', monster["AbilityScores"])
    attributes = {}
    for ab in ability_scores:
        name, score = re.search('([A-Za-z]){3}\s(.*)', ab.strip().lower()).groups()
        attributes[name] = score
    return(attributes)

def process_subtypes(monster):
    if 'SubType' in monster.keys():
        return monster['SubType'][1:-1].lower().split(', ')
    else: 
        return []

def process_movement(monster):
    move_strings = monster['Speed'].split(', ')
    movement = {}
    for move in move_strings:
        move_type, move_speed = re.search("([^0-9]*)\s?([0-9]+)?", move).groups()
        if move_type == '':
            move_type = 'land'
        movement[move_type] = move_speed
    return movement

def process_saves(monster):
    return {'fortitude': monster['Fort'], 'reflex': monster['Ref'], 'will' : monster['Will']}

def process_ac(monster):
    ac_strings = monster['AC'].split(', ')
    ac = {}
    for ac_ele in ac_strings:
        ac_type, ac_value = re.search("([^0-9]*)\s?([0-9]+)", ac_ele).groups()
        if ac_type == '':
            ac_type = 'base'
        ac[ac_type] = ac_value
    
    if 'AC_Mods' in monster.keys():
        ac_strings = re.sub('([0-9])([A-Za-z])', '\\1 \\2', monster['AC_Mods']).lower()
        ac_strings = re.search('[(,]([^;]*)\)', ac_strings).groups()[0]
        ac_strings = re.sub('[()]', '', ac_strings).split(', ')

        for ac_ele in ac_strings:
            ac_value, ac_type,  = re.search("([+-][0-9]+)\s([A-Za-z]+)", ac_ele).groups()
            if ac_type == '':
                ac_type = 'base'
            ac[ac_type] = ac_value
    return ac

def process_senses(monster):
    senses = re.split('[;,] ', m['Senses'])
    senses = [x for x in senses if 'Perception' not in x]
    return senses

def process_monster(entry):
    monster = {}
    monster['characteristics'] = {}
    monster['combat'] = {}
    monster['other'] = {}
    monster['name'] = entry['Name']
    monster['skills'] = process_skills(entry)    
    monster['combat']['space'] = entry['Space']
    monster['combat']['reach'] = entry['Reach']    
    monster['combat']['initiative'] = entry['Init']
    monster['combat']['senses'] = process_senses(entry)
    monster['combat']['hp'] = entry['HP']
    monster['combat']['AC'] = process_ac(entry)    
    monster['combat']['saves'] = process_saves(entry)
    monster['combat']['movement'] = process_movement(entry)
    monster['combat']['attacks'] = process_attacks(entry)
    monster['characteristics']['size'] = entry['Size']
    monster['characteristics']['CR'] = entry['CR']
    monster['characteristics']['XP'] = entry['XP']
    monster['characteristics']['alignment'] = entry['Alignment']    
    monster['characteristics']['type'] = entry['Type']
    monster['characteristics']['subtypes'] = process_subtypes(entry)
    monster['characteristics']['environment'] = entry['Environment']
    monster['characteristics']['appearance'] = entry['Description_Visual']    
    monster['characteristics']['description'] = entry['Description']
    monster['other']['attributes'] = process_attributes(entry)
    monster['other']['HD'] = entry['HD'][1:-1]
    monster['other']['feats'] = process_feats(entry)
    return(monster)



In [48]:
processed_monsters = [process_monster(m) for m in monster_dict ]
                

In [49]:
processed_monsters

[{'characteristics': {'size': 'Medium',
   'CR': '1/2',
   'XP': 200,
   'alignment': 'NG',
   'type': 'outsider',
   'subtypes': ['native'],
   'environment': 'any land',
   'appearance': 'This supernaturally beautiful woman looks human, yet emanates a strange sense of calm and benevolence.',
   'description': "Aasimars are humans with a signif icant amount of celestial or other good outsider blood in their ancestry. Aasimars are not always good, but it is a natural tendency for them, and they gravitate to good faiths or organizations associated with celestials. Aasimar heritage can hide for generations, only to appear suddenly in the child of two apparently human parents. Most societies interpret aasimar births as good omens. Aasimars look mostly human except for some minor physical trait that reveals their unusual heritage. Typical aasimar features are hair that shines like metal, unusual eye or skin color, or even glowing golden halos. Aasimar Characters Aasimars are defined by cla

In [382]:
re.search('^([^+-]+)?\s?([+-/0-9]+)\s(touch\s)?\((.+)\)', 'tail sting +15 (2d6+2 plus poison)').groups()

('tail sting ', '+15', None, '2d6+2 plus poison')

In [260]:
def process_ac(monster):
    ac_strings = monster['AC'].split(', ')
    ac = {}
    for ac_ele in ac_strings:
        ac_type, ac_value = re.search("([^0-9]*)\s?([0-9]+)", ac_ele).groups()
        if ac_type == '':
            ac_type = 'base'
        ac[ac_type] = ac_value
    
    ac_strings = re.search('\((.*)\)', monster['AC_Mods'].lower()).groups()[0]
    ac_strings = re.sub('[()]', '', ac_strings).split(', ')

    for ac_ele in ac_strings:
        print(ac_ele)
        ac_value, ac_type,  = re.search("([+-][0-9]+)\s([A-Za-z]+)", ac_ele).groups()
        if ac_type == '':
            ac_type = 'base'
        ac[ac_type] = ac_value
    return ac

In [33]:
m['Source']

'PFRPG Bestiary 3'

In [450]:
def process_attack_string(attack_string):
    attack_string = re.sub('([0-9])([A-Za-z])', '\\1 \\2', attack_string)
    attack_string = re.sub('([0-9])([(])', '\\1 \\2', attack_string)
    attack_string = re.split(',? or (?![^()]*\))', attack_string, re.IGNORECASE)[0]
    attacks = re.split(',\s*(?![^()]*\))', attack_string)
    attack_list = []
    for attack_string in attacks:
        count = re.search('^[0-9]+', attack_string.strip())
        attack_string = re.sub('(melee )|(ranged )', '', attack_string)
        if ('swarm' in attack_string) or ('horde' in attack_string):
            attack_list.append({'special' : attack_string})
        elif 'see below' in attack_string:
            attack_list.append({'special' : attack_string})
        else:
            if(count):
                attack_string = re.sub('^' + count[0], '', attack_string).strip()
                count = int(count[0])
            else:
                count = 1
            print(attack_string)
            attack_name, attack_bonus, touch, attack_damage = re.search('^(?:\(rage\))?(.+)\s([+-/0-9]+)\s(touch\s)?\((.+)\)', attack_string).groups()
            if '/' in attack_bonus:
                attack_bonuses = attack_bonus.split('/')
                for attack_bonus in attack_bonuses:
                    attack_list.append({'name': attack_name, 'hit': int(attack_bonus), 'damage': attack_damage})
            else:
                if count > 1:
                    attack_name = attack_name[:-1]
                for c in range(count):
                    attack_list.append({'name': attack_name, 'hit': int(attack_bonus), 'damage': attack_damage})
    return(attack_list)

In [451]:
for ac_ele in ac_strings:
    ac_value, ac_type,  = re.search("([+-][0-9]+)\s([A-Za-z]+)", ac_ele).groups()
    if ac_type == '':
        ac_type = 'base'
    print(ac_type, ac_value)

dex +5
dodge +1
natural +1
size +1


In [452]:
ac_strings

['+5 dex', '+1 dodge', '+1 natural', '+1 size']

In [453]:
monsters.loc[monsters.Name == 'Biclops'].Source

1246    Tome of Horrors Complete
Name: Source, dtype: object

In [454]:
monsters.Source.unique()

array(['PFRPG Bestiary', 'AP 36', 'AP 25', 'AP 26', 'AP 27',
       'Book of the Damned V1', 'AP 28', 'AP 29', 'AP 30', 'AP 31',
       'AP 32', 'City of Golden Death', 'AP 33', 'Heart of the Jungle',
       'AP 34', 'AP 35', 'd20pfsrd', 'AP 37', 'AP 38',
       'The Witchwar Legacy', 'TOH1', 'AP 39', 'Misfit Monsters', 'AP 40',
       'Tome of Horrors Revised', 'Godsmouth Heresy',
       'Book of the Damned V2', 'PFRPG Bestiary 2',
       'Lost Cities Of Golarion', 'AP 41', 'AP 42', 'AP 43',
       'Inner Sea World Guide', 'AP 44', 'Tomb Of The Iron Medusa',
       'AP 45', 'AP 46', 'Academy Of Secrets', 'Undead Revisited',
       'AP 47', 'The Harrowing', 'Ultimate Magic', 'AP 48', 'AP 49',
       'Tome of Horrors Complete', 'AP 50', 'The Feast Of Ravenmoor',
       'Lands Of The Linnorm Kings', 'AP 51', 'AP 52',
       'Horsemen Of The Apocalypse', 'Core Race', 'PFRPG Bestiary 3',
       'AP 53', 'The Ruby Phoenix Tournament', 'AP 54', 'Distant Worlds',
       'AP 55', 'Isles Of The