In [341]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from collections import defaultdict

In [26]:
# example of intended df style
changes = pd.DataFrame(columns=('Hero', 'Text', 'num_buffs', 'num_nerfs'))
rows_list = []
rows_list.append({'hero_name': 'Abaddon', 'patch_text': ["Mist Coil cooldown reduced from 5 to 4.5", "Borrowed Time Aghanim's Scepter damage redirection increased from 35% to 50%"], 'num_buffs': 2, 'num_nerfs': 0})
rows_list.append({'hero_name': 'Alchemist', 'patch_text': ["Acid Spray damage increased from 12/16/20/24 to 15/20/25/30", "Acid Spray damage increased from 12/16/20/24 to 15/20/25/30"], 'num_buffs': 2, 'num_nerfs': 0})
pd.DataFrame(rows_list)

Unnamed: 0,hero_name,num_buffs,num_nerfs,patch_text
0,Abaddon,2,0,"[Mist Coil cooldown reduced from 5 to 4.5, Bor..."
1,Alchemist,2,0,[Acid Spray damage increased from 12/16/20/24 ...


In [342]:
# SCRAPE FOR HERO NAME SET

r_heroes = requests.get('http://www.dota2.com/heroes/')
soup_heroes = BeautifulSoup(r_heroes.content, 'html.parser')
hero_name_set = set()
for column in soup_heroes.findAll('div', {'class': 'heroIcons'}):
    for a_tag in column.findAll('a'):
        hero_name_set.add(a_tag['href'].split('/')[-2].replace('_', ' '))

hero_name_set.remove('Natures Prophet')
hero_name_set.add("Nature's Prophet") # dota2.com doesn't include apostrophe

In [362]:
def wiki_query(page, hero_name_set):
    '''
    INPUT: string
    OUTPUT: list of lists of strings
    '''
    r = requests.get('https://dota2.gamepedia.com/{0}'.format(page))
    soup = BeautifulSoup(r.content, 'html.parser')

    hero_change_dict = defaultdict(list)
    for ul in soup.find('div', {'id': 'mw-content-text'}).find('div', recursive=False).findAll('ul', recursive=False):
        try:
            li = ul.find('li')
            if li.find('a')['title'] in hero_name_set:
                for sub_ul in li.findAll('ul', recursive=False):
                    for sub_li in sub_ul.findAll('li', recursive=False):
                        hero_change_dict[li.find('a')['title']].append(sub_li.text.strip())
        except:
            continue

    return hero_change_dict

In [363]:
hero_change_dict = wiki_query('September_24,_2015_Patch', hero_name_set)

In [364]:
hero_change_dict['Chaos Knight']

[u"Chaos Strike now lowers the target's armor by 5 for 8 seconds whenever it crits \n\nApplies before the damage happens, like  Desolator. Can be applied by illusions, but multiple instances do not stack. Only triggers when critical strike",
 u'Chaos Strike critical damage reduced from 150/200/250/300% to 125/175/225/275%']

In [365]:
# BUILD DATA FRAME
# redo this to use dictionary?

heroes = []
change_text = []
for hero, text_list in hero_change_dict.iteritems():
    heroes.append(hero)
    change_text.append(text_list)
df = pd.DataFrame({'hero': heroes, 'text': change_text})

In [366]:
df.loc[20]['text']

[u'Blink distance rescaled from 1000/1075/1150/1150 to 925/1000/1075/1150',
 u'Mana Void stun rescaled from 0.1/0.2/0.3 to 0.15']

In [367]:
changed_heroes_685 = set(df.hero.values)

In [368]:
# FIND HEROES NOT CHANGED IN PATCH

untouched_heroes = []
for hero in hero_name_set:
    if not hero in changed_heroes_685:
        untouched_heroes.append(hero)

In [369]:
# ADD UNCHANGED HEROES TO DF

df_all = pd.concat([df, pd.DataFrame({'hero':untouched_heroes, 'text':[[]]*len(untouched_heroes)})])

In [370]:
df_all.sort('hero', inplace=True)
df_all.reset_index(inplace=True)
df_all.drop('index', axis=1, inplace=True)

In [104]:
# USED FOR MANUAL DATA ENTRY
# df_all.to_csv('hero_table_685.csv')
# df_xl = pd.read_csv('hero_table_685_xl.csv')

In [371]:
df_changes = pd.concat([df_all, df_xl[['num_buffs', 'num_nerfs', 'change_type']]], axis=1)

In [372]:
df_changes['change_type'] = df_changes['change_type'].apply(lambda x: x.replace('[', '').replace(']', '').split(','))
df_changes['change_type'] = df_changes['change_type'].apply(lambda x: [int(num.strip()) for num in x] if len(x[0]) > 0 else [])

In [373]:
df_changes.head(6)

Unnamed: 0,hero,text,num_buffs,num_nerfs,change_type
0,Abaddon,"[Mist Coil cooldown reduced from 5 to 4.5, Bor...",2,0,"[1, 1]"
1,Alchemist,[Acid Spray damage increased from 12/16/20/24 ...,2,0,"[1, 1]"
2,Ancient Apparition,[Cold Feet cast range increased from 700 to 70...,1,0,[1]
3,Anti-Mage,[Blink distance rescaled from 1000/1075/1150/1...,0,2,"[-1, -1]"
4,Arc Warden,[Spark Wraith cooldown reduced from 7 to 4],1,0,[1]
5,Axe,[],0,0,[]


In [374]:
# Pudge and Shadow Shaman have an additional fix listed on wiki
df_changes['change_type'][69] = [1,1,1]
df_changes['change_type'][78] = [1,1,1]

A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from IPython.kernel.zmq import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [375]:
import json
with open('../dota2-database/json/abilities.json') as f:
    abi_json = json.load(f)

In [376]:
ability_set = set(ability['localizedName'] for ability in abi_json)
ability_set.remove('')

In [None]:
import json
with open('../dota2-database/json/heroes.json') as f:
    hero_json = json.load(f)

In [272]:
hero_json_set = set(hero['localizedName'] for hero in hero_json)

In [285]:
hero_name_set - hero_json_set

{u'Arc Warden', u'Oracle', u'Techies', u'Winter Wyvern'}

In [286]:
hero_json_set - hero_name_set

set()

In [377]:
def remove_abi_names(changelist):
    changes_trimmed = []
    for i, item in enumerate(changelist):
        changes_trimmed.append(item)
        for abi in ability_set:
            item = item.replace(abi, '').strip()
            changes_trimmed[i] = item
    return changes_trimmed

In [378]:
df_changes['text_no_abi'] = df_changes['text'].apply(remove_abi_names)

In [379]:
df_changes

Unnamed: 0,hero,text,num_buffs,num_nerfs,change_type,text_no_abi
0,Abaddon,"[Mist Coil cooldown reduced from 5 to 4.5, Bor...",2,0,"[1, 1]","[cooldown reduced from 5 to 4.5, Aghanim's Sce..."
1,Alchemist,[Acid Spray damage increased from 12/16/20/24 ...,2,0,"[1, 1]",[damage increased from 12/16/20/24 to 15/20/25...
2,Ancient Apparition,[Cold Feet cast range increased from 700 to 70...,1,0,[1],[cast range increased from 700 to 700/800/900/...
3,Anti-Mage,[Blink distance rescaled from 1000/1075/1150/1...,0,2,"[-1, -1]",[distance rescaled from 1000/1075/1150/1150 to...
4,Arc Warden,[Spark Wraith cooldown reduced from 7 to 4],1,0,[1],[Spark Wraith cooldown reduced from 7 to 4]
5,Axe,[],0,0,[],[]
6,Bane,"[Enfeeble cooldown reduced from 10 to 8, Brain...",3,0,"[1, 1, 1]","[cooldown reduced from 10 to 8, mana cost redu..."
7,Batrider,[Flamebreak damage over time reduced from 50 p...,2,1,"[-1, 1, 1]",[damage over time reduced from 50 per second t...
8,Beastmaster,[],0,0,[],[]
9,Bloodseeker,[Thirst scaling changed from 100%->25% to 75%-...,0,2,"[-1, -1]","[scaling changed from 100%->25% to 75%->25%, h..."


In [383]:
df_changes.to_csv('patch_685_processed.csv')