In [1]:
import pandas
from bs4 import BeautifulSoup
import splinter
import re
import os

In [2]:
monsters_to_import = [
    'http://dndroll.wikidot.com/creatures:spider',
    'http://dndroll.wikidot.com/creatures:blood-hawk',
    'http://dndroll.wikidot.com/creatures:badger',
    'http://dndroll.wikidot.com/creatures:bat',
    'http://dndroll.wikidot.com/creatures:cat',
    'http://dndroll.wikidot.com/creatures:crab',
    'http://dndroll.wikidot.com/creatures:deer',
    'http://dndroll.wikidot.com/creatures:eagle',
    'http://dndroll.wikidot.com/creatures:fox',
    'http://dndroll.wikidot.com/creatures:frog',
    'http://dndroll.wikidot.com/creatures:goat',
    'http://dndroll.wikidot.com/creatures:hare',
    'http://dndroll.wikidot.com/creatures:hawk',
    'http://dndroll.wikidot.com/creatures:hyena',
    'http://dndroll.wikidot.com/creatures:jackal',
    'http://dndroll.wikidot.com/creatures:lizard',
    'http://dndroll.wikidot.com/creatures:octopus',
    'http://dndroll.wikidot.com/creatures:owl',
    'http://dndroll.wikidot.com/creatures:pig',
    'http://dndroll.wikidot.com/creatures:rat',
    'http://dndroll.wikidot.com/creatures:sheep',
    'http://dndroll.wikidot.com/creatures:vulture',
    'http://dndroll.wikidot.com/creatures:camel',
    'http://dndroll.wikidot.com/creatures:dolphin-legacy',
    'http://dndroll.wikidot.com/creatures:giant-crab',
    'http://dndroll.wikidot.com/creatures:giant-rat',
    'http://dndroll.wikidot.com/creatures:giant-weasel',
    'http://dndroll.wikidot.com/creatures:mastiff',
    'http://dndroll.wikidot.com/creatures:mule',
    'http://dndroll.wikidot.com/creatures:poisonous-snake',
    'http://dndroll.wikidot.com/creatures:pony',
    'http://dndroll.wikidot.com/creatures:boar',
    'http://dndroll.wikidot.com/creatures:constrictor-snake',
    'http://dndroll.wikidot.com/creatures:draft-horse',
    'http://dndroll.wikidot.com/creatures:elk',
    'http://dndroll.wikidot.com/creatures:giant-badger',
    'http://dndroll.wikidot.com/creatures:giant-bat',
    'http://dndroll.wikidot.com/creatures:giant-lizard',
    'http://dndroll.wikidot.com/creatures:giant-owl',
    'http://dndroll.wikidot.com/creatures:giant-poisonous-snake',
    'http://dndroll.wikidot.com/creatures:giant-wolf-spider',
    'http://dndroll.wikidot.com/creatures:panther',
    'http://dndroll.wikidot.com/creatures:riding-horse',
    'http://dndroll.wikidot.com/creatures:wolf',
    'http://dndroll.wikidot.com/creatures:ape',
    'http://dndroll.wikidot.com/creatures:black-bear',
    'http://dndroll.wikidot.com/creatures:crocodile',
    'http://dndroll.wikidot.com/creatures:giant-goat',
    'http://dndroll.wikidot.com/creatures:reef-shark',
    'http://dndroll.wikidot.com/creatures:warhorse',
    'http://dndroll.wikidot.com/creatures:brown-bear',
    'http://dndroll.wikidot.com/creatures:dire-wolf',
    'http://dndroll.wikidot.com/creatures:giant-eagle',
    'http://dndroll.wikidot.com/creatures:giant-hyena',
    'http://dndroll.wikidot.com/creatures:giant-octopus',
    'http://dndroll.wikidot.com/creatures:giant-spider',
    'http://dndroll.wikidot.com/creatures:giant-toad',
    'http://dndroll.wikidot.com/creatures:giant-vulture',
    'http://dndroll.wikidot.com/creatures:lion',
    'http://dndroll.wikidot.com/creatures:tiger',
    'http://dndroll.wikidot.com/creatures:aurochs-legacy',
    'http://dndroll.wikidot.com/creatures:cave-bear',
    'http://dndroll.wikidot.com/creatures:giant-boar',
    'http://dndroll.wikidot.com/creatures:giant-constrictor-snake',
    'http://dndroll.wikidot.com/creatures:giant-crayfish',
    'http://dndroll.wikidot.com/creatures:giant-elk',
    'http://dndroll.wikidot.com/creatures:hunter-shark',
    'http://dndroll.wikidot.com/creatures:polar-bear',
    'http://dndroll.wikidot.com/creatures:rhinoceros',
    'http://dndroll.wikidot.com/creatures:saber-toothed-tiger'
]
database = 'monsters_wikidot.csv'

In [3]:
def get_AC(line):
    return re.match(r'[^0-9]*([0-9]+)', line).group(1)
def get_HP(line):
    return re.match(r'[^0-9]*(.*)', line).group(1)
def get_stat(row):
    tds = row.find_all("td")
    return f'{tds[1].text} ({tds[2].text})'
def get_CR(line):
    CRstr = re.match(r'.*CR\s*([0-9/]+)', line).group(1)
    match_div = re.match(r'([0-9])+/([0-9])+', CRstr)
    if match_div:
        return float(match_div.group(1)) / float(match_div.group(2))
    else:
        return float(CRstr)
def abbreviate(string):
    s2 = string.replace("Skills ", "")\
               .replace("Senses ","")\
               .replace("Languages ","")\
               .replace("Challenge", "CR")\
               .replace("—","")\
               .replace("\n\n","\n")\
               .replace("\n","<br>")\
               .replace(" ft.","'")\
               .replace("<br>",", ")
    s2 = re.sub(r'Passive Perception [0-9]*<br>', "", s2)
    return s2

def get_monster(url):
    monster = {}
    monster['url'] = url
    browser = splinter.Browser("firefox")
    browser.visit(url)
    soup = BeautifulSoup(browser.html, 'html.parser')
    browser.quit()
    main_block = soup.find("div", id="main-content")
    monster['name'] = main_block.find("div", id="page-title").text.strip()
    stats_table = main_block.find("table")
    page_block = main_block.find("div", id="page-content")
    img = page_block.find("img", class_="image")
    if img == None:
        monster['img'] = ""
    else:
        monster['img'] = img["src"]
    #if page_block.find("p").findNext("table") == None: # no paragraph before stats table
    if 1==1:
        for line in page_block.text.split("\n"):  
            line = line.strip()
            if line == "":
                continue
            if not 'meta' in monster.keys():
                monster['meta'] = line
            if re.match(r'.*Armor\s+Class', line):
                monster['AC'] = get_AC(line)
            elif re.match(r'.*Hit\s+Points', line):
                monster['HP'] = get_HP(line)
            elif re.match(r'.*ft\.', line) or re.match(r'.*feet', line):
                monster['movement'] = line
            elif re.match(r'.*Ability', line):
                break;
    else:
        first_paragraph = main_block.find("div", id="page-content").find("p")
        fp_lines = first_paragraph.text.split("\n")
        monster['meta'] = fp_lines[0]
        monster['AC'] = get_AC(fp_lines[1])
        monster['HP'] = get_HP(fp_lines[2])
        monster['movement'] = fp_lines[3]
    
    monster['fly'] = not (re.match(r'.*fly', monster['movement']) == None) 
    monster['swim'] = not (re.match(r'.*swim', monster['movement']) == None)
    rows= stats_table.find_all("tr")
    stats = [get_stat(row) for row in rows[1:7]]
    monster['str'] = stats[0]
    monster['dex'] = stats[1]
    monster['con'] = stats[2]
    monster['int'] = stats[3]
    monster['wis'] = stats[4]
    monster['cha'] = stats[5]
    second_paragraph = stats_table.findNext("p")
    tidbits = abbreviate(second_paragraph.text)
    monster['tidbits'] = abbreviate(second_paragraph.text)
    monster['CR'] = get_CR(monster['tidbits'])
    
    other_paragraphs = second_paragraph.find_all_next("p")
    monster['attributes'] = ""
    for paragraph in other_paragraphs:
        monster['attributes'] += "<br>" + abbreviate(paragraph.text)
    monster['attributes'] = monster['attributes'][4:]
    source = re.match(r'.*Source:?\s*([^<]*)', monster['attributes'])
    if (source == None):
        monster['source'] = ""
    else:
        monster['source'] = source.group(1)
    return monster

In [4]:
old_monsters_df = None
if os.path.exists(database):
    old_monsters_df = pandas.read_csv(database)
    old_monsters_df = old_monsters_df.set_index('url', drop=False)
else:
    old_monsters_df = pandas.DataFrame()
monsters_list = []
for url in monsters_to_import:
    if 'url' in old_monsters_df.columns and url in old_monsters_df.index:
        old_row = old_monsters_df.loc[url]
        print(f"using old data for {old_row['name']}")
        monsters_list.append(old_row.to_dict())
    else:
        print(f"downloading {url}")
        monsters_list.append(get_monster(url))
    monsters_df = pandas.DataFrame(monsters_list)
    monsters_df.to_csv(database, index=False)
monsters_df

using old data for Spider
using old data for Blood Hawk
using old data for Badger
using old data for Bat
using old data for Cat
using old data for Crab
using old data for Deer
using old data for Eagle
using old data for Fox
using old data for Frog
using old data for Goat
using old data for Hare
using old data for Hawk
using old data for Hyena
using old data for Jackal
using old data for Lizard
using old data for Octopus
using old data for Owl
using old data for Pig
using old data for Rat
using old data for Sheep
using old data for Vulture
using old data for Camel
using old data for Dolphin
using old data for Giant Crab
using old data for Giant Rat
using old data for Giant Weasel
using old data for Mastiff
using old data for Mule
using old data for Poisonous Snake
using old data for Pony
using old data for Boar
using old data for Constrictor Snake
using old data for Draft Horse
using old data for Elk
using old data for Giant Badger
using old data for Giant Bat
using old data for Giant L

Unnamed: 0,url,name,img,meta,AC,HP,movement,fly,swim,str,dex,con,int,wis,cha,tidbits,CR,attributes,source
0,http://dndroll.wikidot.com/creatures:spider,Spider,https://media-waterdeep.cursecdn.com/avatars/t...,"Tiny beast, unaligned",12,1 (1d4 - 1),"Speed 20 ft., climb 20 ft.",False,False,2 (-4),14 (+2),8 (-1),1 (-5),10 (+0),2 (-4),"Stealth +4, Darkvision 30', Passive Perception...",0.000,Spider Climb. The spider can climb difficult s...,Monster Manual
1,http://dndroll.wikidot.com/creatures:blood-hawk,Blood Hawk,https://external-content.duckduckgo.com/iu/?u=...,"Small beast, unaligned",12,7 (2d6),"Speed 10 ft., fly 60 ft.",True,False,6 (-2),14 (+2),10 (+0),3 (-4),14 (+2),5 (-3),"Perception +4, Passive Perception 14, CR 1/8 (...",0.125,Keen Sight. The hawk has advantage on Wisdom (...,Monster Manual
2,http://dndroll.wikidot.com/creatures:badger,Badger,https://media-waterdeep.cursecdn.com/avatars/t...,"Tiny beast, unaligned",10,3 (1d4 + 1),"Speed 20 ft., burrow 5 ft.",False,False,4 (-3),11 (+0),12 (+1),2 (-4),12 (+1),5 (-3),"Darkvision 30', Passive Perception 11, CR 0 (1...",0.000,Keen Smell. The badger has advantage on Wisdom...,Monster Manual
3,http://dndroll.wikidot.com/creatures:bat,Bat,https://media-waterdeep.cursecdn.com/avatars/t...,"Tiny beast, unaligned",12,1 (1d4 - 1),"Speed 5 ft., fly 30 ft.",True,False,2 (-4),15 (+2),8 (-1),2 (-4),12 (+1),4 (-3),"Blindsight 60', Passive Perception 11, CR 0 (1...",0.000,Echolocation. The bat can't use its blindsight...,Monster Manual
4,http://dndroll.wikidot.com/creatures:cat,Cat,https://external-content.duckduckgo.com/iu/?u=...,"Tiny beast, unaligned",12,2 (1d4),"Speed 40 ft., climb 30 ft.",False,False,3 (-4),15 (+2),10 (+0),3 (-4),12 (+1),7 (-2),"Perception +3, Stealth +4, Passive Perception ...",0.000,Keen Smell. The cat has advantage on Wisdom (P...,Monster Manual
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,http://dndroll.wikidot.com/creatures:giant-elk,Giant Elk,,"Huge beast, unaligned",14,42 (5d12 + 10),Speed 60 ft.,False,False,19 (+4),16 (+3),14 (+2),7 (-2),14 (+2),10 (+0),"Perception +4, Passive Perception 14, Giant El...",2.000,Charge. If the elk moves at least 20 feet stra...,Monster Manual
66,http://dndroll.wikidot.com/creatures:hunter-shark,Hunter Shark,,"Large beast, unaligned",12,45 (6d10 + 12),"Speed 0 ft., swim 40 ft.",False,True,18 (+4),13 (+1),15 (+2),1 (-5),10 (+0),4 (-3),"Perception +2, Blindsight 30', Passive Percept...",2.000,Blood Frenzy. The shark has advantage on melee...,Monster Manual
67,http://dndroll.wikidot.com/creatures:polar-bear,Polar Bear,,"Large beast, unaligned",12,42 (5d10 + 15),"Speed 40 ft., swim 30 ft.",False,True,20 (+5),10 (+0),16 (+3),2 (-4),13 (+1),7 (-2),"Perception +3, Passive Perception 13, CR 2 (45...",2.000,Keen Smell. The bear has advantage on Wisdom (...,Monster Manual
68,http://dndroll.wikidot.com/creatures:rhinoceros,Rhinoceros,,"Large beast, unaligned",11,45 (6d10 + 12),Speed 40 ft.,False,False,21 (+5),8 (-1),15 (+2),2 (-4),12 (+1),6 (-2),"Passive Perception 11, CR 2 (450 XP)",2.000,Charge. If the rhinoceros moves at least 20 fe...,Monster Manual
