In [1]:
from datetime import datetime
import requests
import numpy as np
import pandas as pd
import yaml
import re
from bs4 import BeautifulSoup
import wikitextparser as wtp
from ratelimit import rate_limited
from operator import *

def save_obj(obj, name ):
    import pickle
    with open('obj/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
        
def load_obj(name ):
    import pickle
    with open('obj/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [2]:
@rate_limited(1, 30)
def parseSquad(title):
    '''
    input: title of tournament
    output: dictionary of participants of that tournament
    '''
    headers = {
    'User-Agent': 'Data for Research',
    'From': 'terthasarit@live.com', 
    'Accept-Encoding': 'gzip'
    }
    titleurl = re.sub(' ', '%20', title)
    r = requests.get(url='http://liquipedia.net/dota2/api.php?action=parse&format=json&page=' + titleurl + '&prop=wikitext%7Ctext', headers=headers)
    json = r.json()
    wiki = json.get('parse').get('wikitext').get('*')
    wiki = re.sub('\n', '', wiki)
    parsed = wtp.parse(wiki)
    templates = parsed.templates
    squad = {}
    for t in templates:
        if str(t)[:11] == '{{TeamCard|':
            for ar in t.arguments:
                if (str(ar)[:5] == '|team'):
                    teamname = re.sub(r'\W+', '', ar.value).lower()
                    squad[teamname] = {}
                for i in range(1, 6):
                    if (str(ar)[:4] == '|p' + str(i) + '='):
                        squad[teamname]['pos' + str(i)] = re.sub(r'\W+', '', ar.value).lower()  
    return squad, json

In [3]:
def getEloDict(ifd):
    '''
    receive infomation dictionary of tournament and
    return elodict
    '''
    start = re.sub('/', '-', ifd['start'])
    end = re.sub('/', '-', ifd['end'])
    #rating of startdate from datdota and turn into list of dict
    urldat = 'http://www.datdota.com/api/ratings?date=' + start
    htmldat = requests.get(urldat)
    htmldat.encoding = 'utf-8'
    elo = BeautifulSoup(htmldat.text, "lxml")
    elodict = yaml.load(elo.text)
    startelolist = elodict['data']   
    #rating of enddate from datdota and turn into list of dict
    urldat = 'http://www.datdota.com/api/ratings?date=' + end
    htmldat = requests.get(urldat)
    htmldat.encoding = 'utf-8'
    elo = BeautifulSoup(htmldat.text, "lxml")
    elodict = yaml.load(elo.text)
    endelolist = elodict['data']
    return (startelolist, endelolist)

In [4]:
def compareAndPut(df, squad, ifd, startelodict, endelodict):
    for lqn, pdc in squad.items():
        try:
            teamName = comparedict[lqn]
        except KeyError:
            teamName = lqn
        j = 0
        for srt in startelodict:
            for ert in endelodict:
                if (teamName == re.sub(r'\W+', '', srt['teamName']).lower()) & (teamName == re.sub(r'\W+', '', ert['teamName']).lower()):
                    j += 1
                    for pos, id in pdc.items():
                        df = df.append({
                            'team': teamName,
                            'position': pos,
                            'id': id,
                            'tour': ifd['title'],
                            'prizeusd': ifd['prize'],
                            'start': ifd['start'],
                            'end': ifd['end'],
                            'startelo': srt['elo64']['current'],
                            'startglicko2mu': srt['glicko2']['mu'],
                            'startglicko2phi': srt['glicko2']['phi'],
                            'startglicko2rating': srt['glicko2']['rating'],
                            'endelo': ert['elo64']['current'],
                            'endglicko2mu': ert['glicko2']['mu'],
                            'endglicko2phi': ert['glicko2']['phi'],
                            'endglicko2rating': ert['glicko2']['rating']
                        }, ignore_index=True)
        if j == 0:
            for pos, id in pdc.items():
                if id == '':
                    continue
                else:
                    df = df.append({
                        'team': teamName,
                        'position': pos,
                        'id': id,
                        'tour': ifd['title'],
                        'prizeusd': ifd['prize'],
                        'start': ifd['start'],
                        'end': ifd['end'],
                        'startelo': np.nan,
                        'startglicko2mu': np.nan,
                        'startglicko2phi': np.nan,
                        'startglicko2rating': np.nan,
                        'endelo': np.nan,
                        'endglicko2mu': np.nan,
                        'endglicko2phi': np.nan,
                        'endglicko2rating': np.nan
                    }, ignore_index=True)
        
    return df

In [5]:
alltour = load_obj('/dict/touranddate')
comparedict = load_obj('/dict/lqtodat')

In [6]:
endti2 = '2012-09-02'
endti7 = '2017-08-02'

In [17]:
%%time
errortitle = []
df = pd.DataFrame(columns=['team', 'position', 'id', 'tour', 'prizeusd', 'start', 'end',
                           'startelo', 'startglicko2mu', 'startglicko2phi', 'startglicko2rating', 'endelo', 'endglicko2mu', 'endglicko2phi', 'endglicko2rating'])
for turl, ifd in tourleft.items():
    try:
        if (pd.to_datetime(ifd['start']) > pd.to_datetime(endti2)) & (pd.to_datetime(ifd['end']) <= pd.to_datetime(endti7)):
            ptc, lqjson = parseSquad(turl)
            startelodict, endelodict = getEloDict(ifd)
            save_obj({'wikitext': lqjson, 'startelodict': startelodict, 'endelodict': endelodict}, re.sub(r'\W+', '', ifd['title']).lower() )
            df = compareAndPut(df, ptc, ifd, startelodict, endelodict)
    except:
        print(turl)
        errortitle = errortitle.append(turl)

Wall time: 34min 35s


In [9]:
turl

'Dota 2 Radiant & Dire Cup/2015'

In [15]:
tourleft = alltour.copy()
for k, v in alltour.items():
    del tourleft[k]
    if k == 'Shanghai Dota 2 Open/1':
        break

In [27]:
for k, v in alltour.items():
    print(k)

The International/2011
Electronic Sports World Cup 2011
The Defense/Season 1
DreamHack/2011/Winter
Dota2 Star Championship
Malaysia Invitational
The Premier League/Season 1
Dota2Replays Brawl
Razer Dota 2 Tournament
StarLadder/StarSeries/Season 1
ProDOTA2 League/Season 1/Non Pro
ProDOTA2 League/Season 1/Pro
The Premier League/Season 2
Gigabyte Dota 2 Masters
GosuLeague/1
StarLadder/StarSeries/Season 2
GosuLeague/2
DreamHack/2012/Summer
The Defense/Season 2
GosuLeague/3
Alienware 2012 Battlegrounds
Samsung European Encounter
The International/2012
WCG Asia 2012
RaidCall Dota 2 League Season 1
StarLadder/StarSeries/Season 3
GosuLeague/4
GosuLeague/4/Division 1
The Premier League/Season 3
G-1 Champions League Season 4
Electronic Sports World Cup 2012
GosuLeague/5
GosuLeague/5/Division 1
StarLadder/StarSeries/Season 4
DreamHack/2012/Winter
The Defense/Season 3
World Cyber Games 2012
Thor Open 2012
RaidCall Dota 2 League Season 2
ASUS Open 2012
The Asia 2012
G-League 2012 Season 2
The Premi

In [20]:
alltour['Shanghai Dota 2 Open/1']

{'end': '03/01/2016',
 'prize': 61472,
 'start': '01/01/2016',
 'title': 'Shanghai Dota 2 Open #1'}

In [16]:
tourleft

{'Adrenaline Cyber League/2017': {'end': '22/11/2017',
  'prize': 100000,
  'start': '21/11/2017',
  'title': 'Adrenaline Cyber League'},
 'Boston Major/2016': {'end': '10/12/2016',
  'prize': 3000000,
  'start': '03/12/2016',
  'title': 'The Boston Major 2016'},
 'Captains Draft/3': {'end': '17/02/2016',
  'prize': 124154,
  'start': '19/01/2016',
  'title': 'Captains Draft 3.0'},
 'Captains Draft/4': {'end': '07/01/2018',
  'prize': 300000,
  'start': '04/01/2018',
  'title': 'Captains Draft 4.0'},
 'China Top/2016': {'end': '18/12/2016',
  'prize': 229904,
  'start': '17/12/2016',
  'title': 'China Top 2016'},
 'China Top/2017': {'end': '23/11/2017',
  'prize': 'Nan',
  'start': '19/11/2017',
  'title': 'China Top 2017'},
 'Dota 2 Asia Championships/2017': {'end': '04/04/2017',
  'prize': 611997,
  'start': '27/03/2017',
  'title': 'Dota 2 Asia Championships 2017'},
 'Dota 2 Asia Championships/2018': {'end': '07/04/2018',
  'prize': 1000000,
  'start': '30/03/2018',
  'title': 'Dota

In [26]:
def correction(urllist):
    for turl in urllist:
        ifd = alltour[turl]
        ptc, lqjson = parseSquad(turl)
        startelodict, endelodict = getEloDict(ifd)
        save_obj({'wikitext': lqjson, 'startelodict': startelodict, 'endelodict': endelodict}, re.sub(r'\W+', '', ifd['title']).lower() )  

In [49]:
turl = 'Dota 2 Radiant %26 Dire Cup/2015'
ifd = alltour['Dota 2 Radiant & Dire Cup/2015']
ptc, lqjson = parseSquad(turl)
startelodict, endelodict = getEloDict(ifd)
save_obj({'wikitext': lqjson, 'startelodict': startelodict, 'endelodict': endelodict}, re.sub(r'\W+', '', ifd['title']).lower() )  

In [47]:
urllist = ['Dota 2 Radiant %26 Dire Cup/2015']

In [48]:
correction(urllist)

KeyError: 'Dota 2 Radiant %26 Dire Cup/2015'

In [30]:
len(alltour)

304

In [39]:
i = 0

In [42]:
for turl, ifd in alltour.items():
    if (pd.to_datetime(ifd['start']) > pd.to_datetime(endti2)) & (pd.to_datetime(ifd['end']) <= pd.to_datetime(endti7)):
        print(turl)

StarLadder/StarSeries/Season 1
RaidCall Dota 2 League Season 1
StarLadder/StarSeries/Season 3
G-1 Champions League Season 4
StarLadder/StarSeries/Season 4
DreamHack/2012/Winter
The Defense/Season 3
World Cyber Games 2012
RaidCall Dota 2 League Season 2
ASUS Open 2012
The Asia 2012
G-League 2012 Season 2
The Premier League/Season 4
StarLadder/StarSeries/Season 5
Armaggeddon Dota 2 Grand Slam Asia 2013
WePlay Dota2 League/Season 1
DreamHack/2013/Invitational
G-1 Champions League Season 5
ESL Major Series One/Spring/Main Event
Ritmix Russian DOTA 2 League
Techlabs Cup/2013/Season 1
Curse Dota 2 Invitational
AMD Premier League/Season 1
Dota 2 Super League
Rapture Gaming Network League 2013/2014
StarLadder/StarSeries/Season 6
The Premier League/Season 5
RaidCall Dota 2 League Season 3
The Defense/Season 4
American Dota League/Season 1
DreamHack/2013/Summer
Alienware Cup
ESL Major Series One/Summer/Main Event
Corsair Gaming Summer Dota 2 Tournament 2013
Nexon Starter League
E2MAX L33t Champi

In [41]:
i

248