In [40]:
from datetime import datetime
import requests
import numpy as np
import pandas as pd
import yaml
import re
from bs4 import BeautifulSoup
import wikitextparser as wtp
from ratelimit import rate_limited
from operator import *

def save_obj(obj, name ):
    import pickle
    with open('obj/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
        
def load_obj(name ):
    import pickle
    with open('obj/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [6]:
def parseSquad(tourdict):
    '''
    input: title of tournament
    output: dictionary of participants of that tournament
    '''
    json = tourdict['wikitext']
    wiki = json.get('parse').get('wikitext').get('*')
    wiki = re.sub('\n', '', wiki)
    parsed = wtp.parse(wiki)
    templates = parsed.templates
    squad = {}
    for t in templates:
        if str(t)[:11] == '{{TeamCard|':
            for ar in t.arguments:
                if (str(ar)[:5] == '|team'):
                    teamname = re.sub(r'\W+', '', ar.value).lower()
                    squad[teamname] = {}
                for i in range(1, 6):
                    if (str(ar)[:4] == '|p' + str(i) + '='):
                        squad[teamname]['pos' + str(i)] = re.sub(r'\W+', '', ar.value).lower()  
    return squad

In [None]:
def getEloDict(ifd):
    '''
    receive infomation dictionary of tournament and
    return elodict
    '''
    start, end = dateToGlicko(ifd['start'], ifd['end'])
    #rating of startdate from datdota and turn into list of dict
    urldat = 'http://www.datdota.com/api/ratings?date=' + start
    htmldat = requests.get(urldat)
    htmldat.encoding = 'utf-8'
    elo = BeautifulSoup(htmldat.text, "lxml")
    elodict = yaml.load(elo.text)
    startelolist = elodict['data']   
    #rating of enddate from datdota and turn into list of dict
    urldat = 'http://www.datdota.com/api/ratings?date=' + end
    htmldat = requests.get(urldat)
    htmldat.encoding = 'utf-8'
    elo = BeautifulSoup(htmldat.text, "lxml")
    elodict = yaml.load(elo.text)
    endelolist = elodict['data']
    return (startelolist, endelolist)

In [7]:
def getEloDict(tourdict):
    return (tourdict['startelodict'], tourdict['endelodict'])

In [8]:
def compareAndPut(df, squad, ifd, startelodict, endelodict):
    for lqn, pdc in squad.items():
        try:
            teamName = comparedict[lqn]
        except KeyError:
            teamName = lqn
        j = 0
        for srt in startelodict:
            for ert in endelodict:
                if (teamName == re.sub(r'\W+', '', srt['teamName']).lower()) & (teamName == re.sub(r'\W+', '', ert['teamName']).lower()):
                    j += 1
                    for pos, id in pdc.items():
                        df = df.append({
                            'team': teamName,
                            'position': pos,
                            'id': id,
                            'tour': ifd['title'],
                            'prizeusd': ifd['prize'],
                            'start': ifd['start'],
                            'end': ifd['end'],
                            'startelo': srt['elo64']['current'],
                            'startglicko2mu': srt['glicko2']['mu'],
                            'startglicko2phi': srt['glicko2']['phi'],
                            'startglicko2rating': srt['glicko2']['rating'],
                            'endelo': ert['elo64']['current'],
                            'endglicko2mu': ert['glicko2']['mu'],
                            'endglicko2phi': ert['glicko2']['phi'],
                            'endglicko2rating': ert['glicko2']['rating']
                        }, ignore_index=True)
        if j == 0:
            for pos, id in pdc.items():
                if id == '':
                    continue
                else:
                    df = df.append({
                        'team': teamName,
                        'position': pos,
                        'id': id,
                        'tour': ifd['title'],
                        'prizeusd': ifd['prize'],
                        'start': ifd['start'],
                        'end': ifd['end'],
                        'startelo': np.nan,
                        'startglicko2mu': np.nan,
                        'startglicko2phi': np.nan,
                        'startglicko2rating': np.nan,
                        'endelo': np.nan,
                        'endglicko2mu': np.nan,
                        'endglicko2phi': np.nan,
                        'endglicko2rating': np.nan
                    }, ignore_index=True)
        
    return df

In [9]:
alltour = load_obj('/dict/touranddate')
comparedict = load_obj('/dict/lqtodat')

In [10]:
endti2 = '2012-09-02'
endti7 = '2017-08-02'

In [12]:
%%time
errortitle = []
df = pd.DataFrame(columns=['team', 'position', 'id', 'tour', 'prizeusd', 'start', 'end',
                           'startelo', 'startglicko2mu', 'startglicko2phi', 'startglicko2rating', 'endelo', 'endglicko2mu', 'endglicko2phi', 'endglicko2rating'])
for turl, ifd in alltour.items():
    try:
        if (pd.to_datetime(ifd['start']) > pd.to_datetime(endti2)) & (pd.to_datetime(ifd['end']) <= pd.to_datetime(endti7)):
            tourdict = load_obj(re.sub(r'\W+', '', ifd['title']).lower() )
            ptc = parseSquad(tourdict)
            startelodict, endelodict = getEloDict(tourdict)
            df = compareAndPut(df, ptc, ifd, startelodict, endelodict)
    except:
        print(turl)
        errortitle = errortitle.append(turl)

Wall time: 1min 16s


In [19]:
df[df['id'] == 'n0tail']

Unnamed: 0,team,position,id,tour,prizeusd,start,end,startelo,startglicko2mu,startglicko2phi,startglicko2rating,endelo,endglicko2mu,endglicko2phi,endglicko2rating
12061,og,pos1,n0tail,AMD SAPPHIRE Dota PIT League,300000.0,02/11/2017,05/11/2017,1199.142516,1913.541075,46.644558,1796.929681,1127.265194,1913.541075,46.644558,1796.929681
2924,fnatic,pos4,n0tail,ASUS ROG DreamLeague Kick-Off Season,50000.0,04/11/2013,30/11/2013,1342.302135,1781.449596,37.472799,1687.767599,1241.244534,1786.215078,35.18948,1698.241378
1804,fnatic,pos4,n0tail,Corsair Gaming Summer Tournament 2013,10000.0,28/06/2013,26/07/2013,1220.024244,1726.254558,36.074633,1636.067976,1209.129289,1748.109445,35.234209,1660.023922
11446,og,pos1,n0tail,Dota 2 Asia Championships 2017,611997.0,27/03/2017,04/04/2017,1367.850733,2050.008289,44.6812,1938.30529,1328.541635,2033.681993,43.742622,1924.325437
3594,fnatic,pos4,n0tail,Dota 2 League Season 4,50000.0,07/01/2014,08/01/2014,1261.007534,1824.588064,35.269572,1736.414134,1218.051864,1824.588064,35.269572,1736.414134
11236,og,pos1,n0tail,Dota Pit League Season 5,139860.0,20/01/2017,22/01/2017,1340.040716,1994.239917,43.230624,1886.163358,1278.853121,1994.239917,43.230624,1886.163358
4594,fnatic,pos4,n0tail,DreamHack Bucharest Invitational,25000.0,26/04/2014,27/04/2014,1065.384906,1811.23233,33.425842,1727.667725,1064.936553,1811.23233,33.425842,1727.667725
947,fnaticeu,pos4,n0tail,DreamHack Dota 2 Invitational,6000.0,14/03/2013,18/04/2013,,,,,,,,
1604,fnatic,pos4,n0tail,DreamHack Summer 2013,46422.0,15/06/2013,17/06/2013,1136.699223,1710.106458,37.903142,1615.348603,1177.302091,1709.478184,37.940101,1614.627932
278,fnatic,pos4,n0tail,DreamHack Winter 2012,30198.0,22/11/2012,25/11/2012,,,,,,,,


In [17]:
df.groupby('tour').size().sort_values(ascending=False)

tour
Fragbite Masters 2013                              160
The Defense Season 3                               160
Dota2 ACE - Provisional                            135
Dota2 Professional League Season 1                 125
WePlay Dota2 League Season 2                       100
The Defense Season 4                               100
Dota 2 Asia Championships 2015                     100
Shanghai Dota 2 Open #2                            100
The International 2014                              95
The Kiev Major 2017                                 95
VPGame Pro League Season 2                          95
The International 2016                              90
The Defense Season 5                                90
The International 2015                              90
VIP LAN CENTER                                      90
G-League 2016                                       90
StarLadder StarSeries Season 8                      90
Russian e-Sports Cup 2017                           85
Dota 

In [20]:
df.dropna()[['team', ['id'], ['tour'], ['startglicko2']]]

Unnamed: 0,team,position,id,tour,prizeusd,start,end,startelo,startglicko2mu,startglicko2phi,startglicko2rating,endelo,endglicko2mu,endglicko2phi,endglicko2rating
30,evilgeniuses,pos1,fear,RaidCall Dota 2 League Season 1,10000,10/09/2012,18/11/2012,1127.178191,1739.804399,58.251412,1594.175869,1102.305540,1757.679651,45.318269,1644.383978
31,evilgeniuses,pos2,jeyo,RaidCall Dota 2 League Season 1,10000,10/09/2012,18/11/2012,1127.178191,1739.804399,58.251412,1594.175869,1102.305540,1757.679651,45.318269,1644.383978
32,evilgeniuses,pos3,demon,RaidCall Dota 2 League Season 1,10000,10/09/2012,18/11/2012,1127.178191,1739.804399,58.251412,1594.175869,1102.305540,1757.679651,45.318269,1644.383978
33,evilgeniuses,pos4,maelk,RaidCall Dota 2 League Season 1,10000,10/09/2012,18/11/2012,1127.178191,1739.804399,58.251412,1594.175869,1102.305540,1757.679651,45.318269,1644.383978
34,evilgeniuses,pos5,bdiz,RaidCall Dota 2 League Season 1,10000,10/09/2012,18/11/2012,1127.178191,1739.804399,58.251412,1594.175869,1102.305540,1757.679651,45.318269,1644.383978
60,natusvincere,pos1,xboct,StarLadder StarSeries Season 3,15000,17/09/2012,21/10/2012,1173.297392,1728.383964,59.512633,1579.602380,1278.923707,1767.597560,51.127330,1639.779235
61,natusvincere,pos2,dendi,StarLadder StarSeries Season 3,15000,17/09/2012,21/10/2012,1173.297392,1728.383964,59.512633,1579.602380,1278.923707,1767.597560,51.127330,1639.779235
62,natusvincere,pos3,lightofheaven,StarLadder StarSeries Season 3,15000,17/09/2012,21/10/2012,1173.297392,1728.383964,59.512633,1579.602380,1278.923707,1767.597560,51.127330,1639.779235
63,natusvincere,pos4,puppey,StarLadder StarSeries Season 3,15000,17/09/2012,21/10/2012,1173.297392,1728.383964,59.512633,1579.602380,1278.923707,1767.597560,51.127330,1639.779235
64,natusvincere,pos5,arsart,StarLadder StarSeries Season 3,15000,17/09/2012,21/10/2012,1173.297392,1728.383964,59.512633,1579.602380,1278.923707,1767.597560,51.127330,1639.779235


In [24]:
df.dropna().groupby(['tour', 'team']).first()[['startglicko2rating', 'endglicko2rating']]

Unnamed: 0_level_0,Unnamed: 1_level_0,startglicko2rating,endglicko2rating
tour,team,Unnamed: 2_level_1,Unnamed: 3_level_1
2013 WPC ACE Dota 2 League,invictusgaming,1688.688466,1759.522708
2013 WPC ACE Dota 2 League,lgdgaming,1685.328918,1734.797461
2013 WPC ACE Dota 2 League,vicigaming,1571.859442,1755.023341
2015 Red Bull Battle Grounds: Dota 2,invictusgaming,1819.971339,1828.249730
2015 Red Bull Battle Grounds: Dota 2,teammalaysia,1737.209517,1762.298425
2015 Red Bull Battle Grounds: Dota 2,teamsecret,1885.938285,1883.468335
AMD Premier League Season 1,zenith,1627.294497,1629.948912
AMD Premier League Season 2,zenith,1631.767538,1611.700957
AMD SAPPHIRE Dota PIT League,fnatic,1672.597279,1672.597279
AMD SAPPHIRE Dota PIT League,newbee,1874.846760,1874.846760


In [27]:
gl = df.dropna().groupby(['tour', 'team']).first()[['start', 'startglicko2rating','end', 'endglicko2rating']]

In [28]:
gl[gl['startglicko2rating'] == gl['endglicko2rating']]

Unnamed: 0_level_0,Unnamed: 1_level_0,start,startglicko2rating,end,endglicko2rating
tour,team,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AMD SAPPHIRE Dota PIT League,fnatic,02/11/2017,1672.597279,05/11/2017,1672.597279
AMD SAPPHIRE Dota PIT League,newbee,02/11/2017,1874.846760,05/11/2017,1874.846760
AMD SAPPHIRE Dota PIT League,og,02/11/2017,1796.929681,05/11/2017,1796.929681
AMD SAPPHIRE Dota PIT League,teamliquid,02/11/2017,1986.916514,05/11/2017,1986.916514
AMD SAPPHIRE Dota PIT League,vicigaming,02/11/2017,1862.509684,05/11/2017,1862.509684
AMD SAPPHIRE Dota PIT League,virtuspro,02/11/2017,1923.377196,05/11/2017,1923.377196
ASUS Open 2012 Finals,natusvincere,13/12/2012,1644.796835,16/12/2012,1644.796835
ASUS Open 2012 Finals,teamempire,13/12/2012,1660.709947,16/12/2012,1660.709947
ASUS Open 2012 Finals,virtuspro,13/12/2012,1527.051333,16/12/2012,1527.051333
ASUS ROG DreamLeague Season 6,alliance,25/11/2016,1708.839752,26/11/2016,1708.839752


In [13]:
tourdict = load_obj('dota2leagueseason4')

In [14]:
tourdict['startelodict'][0]['glickoRatingDate'] == tourdict['endelodict'][0]['glickoRatingDate']

True

In [15]:
tourdict['endelodict'][0]['glickoRatingDate']

1388959200000

In [108]:
def dateToGlicko(startdate, enddate):
    stepo = (pd.to_datetime(startdate, format='%d/%m/%Y') - pd.to_datetime('1970-1-1')).total_seconds() * 1000
    enepo = (pd.to_datetime(enddate, format='%d/%m/%Y') - pd.to_datetime('1970-1-1')).total_seconds() * 1000
    glmo = 338400000
    week = 604800000
    two_hour = 7200000
    stgl = pd.to_datetime(stepo - (stepo % week) + (glmo) + two_hour, unit='ms', origin='unix')
    engl = pd.to_datetime(enepo - (enepo % week) + (glmo) + two_hour, unit='ms', origin='unix') if stgl != pd.to_datetime(enepo - (enepo % week) + (glmo) + two_hour, unit='ms', origin='unix') else pd.to_datetime(enepo - (enepo % week) + (glmo) + two_hour + week, unit='ms', origin='unix')                  
    return re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\3-\2-\1', str(stgl)[:10]), re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\3-\2-\1', str(engl)[:10])    

In [109]:
dateToGlicko('24/05/2017', '31/05/2017')

('22-05-2017', '29-05-2017')

In [94]:
startDateToGlicko('30/05/2017')

'29-05-2017'

In [96]:
endDateToGlicko('31/05/2017')

'29-05-2017'

In [None]:
epo = (pd.to_datetime(startdate, format='%d/%m/%Y') - pd.to_datetime('1970-1-1')).total_seconds() * 1000
glmo = 338400000
week = 604800000
two_hour = 7200000
return re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\3-\2-\1', str(pd.to_datetime(epo - (epo % week) + (glmo) + two_hour, unit='ms', origin='unix'))[:10])
