In [1]:
from datetime import datetime
import requests
import numpy as np
import pandas as pd
import yaml
import re
from bs4 import BeautifulSoup
import wikitextparser as wtp
from ratelimit import rate_limited
from operator import *

def save_obj(obj, name ):
    import pickle
    with open('obj/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
        
def load_obj(name ):
    import pickle
    with open('obj/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [2]:
def dateToGlicko(startdate, enddate):
    stepo = (pd.to_datetime(startdate, format='%d/%m/%Y') - pd.to_datetime('1970-1-1')).total_seconds() * 1000
    enepo = (pd.to_datetime(enddate, format='%d/%m/%Y') - pd.to_datetime('1970-1-1')).total_seconds() * 1000
    glmo = 338400000
    week = 604800000
    two_hour = 7200000
    stgl = pd.to_datetime(stepo - (stepo % week) + (glmo) + two_hour, unit='ms', origin='unix')
    engl = pd.to_datetime(enepo - (enepo % week) + (glmo) + two_hour, unit='ms', origin='unix') if stgl != pd.to_datetime(enepo - (enepo % week) + (glmo) + two_hour, unit='ms', origin='unix') else pd.to_datetime(enepo - (enepo % week) + (glmo) + two_hour + week, unit='ms', origin='unix')                  
    return re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\3-\2-\1', str(stgl)[:10]), re.sub(r'(\d{4})-(\d{2})-(\d{2})', r'\3-\2-\1', str(engl)[:10])    

In [3]:
def parseSquad(tourdict):
    '''
    input: title of tournament
    output: dictionary of participants of that tournament
    '''
    json = tourdict['wikitext']
    wiki = json.get('parse').get('wikitext').get('*')
    wiki = re.sub('\n', '', wiki)
    parsed = wtp.parse(wiki)
    templates = parsed.templates
    squad = {}
    for t in templates:
        if str(t)[:11] == '{{TeamCard|':
            for ar in t.arguments:
                if (str(ar)[:5] == '|team'):
                    teamname = re.sub(r'\W+', '', ar.value).lower()
                    squad[teamname] = {}
                for i in range(1, 6):
                    if (str(ar)[:4] == '|p' + str(i) + '='):
                        squad[teamname]['pos' + str(i)] = re.sub(r'\W+', '', ar.value).lower()  
    return squad, json

In [4]:
def getEloDict(ifd):
    '''
    receive infomation dictionary of tournament and
    return elodict
    '''
    start, end = dateToGlicko(ifd['start'], ifd['end'])
    #rating of startdate from datdota and turn into list of dict
    urldat = 'http://www.datdota.com/api/ratings?date=' + start
    htmldat = requests.get(urldat)
    htmldat.encoding = 'utf-8'
    elo = BeautifulSoup(htmldat.text, "lxml")
    elodict = yaml.load(elo.text)
    startelolist = elodict['data']   
    #rating of enddate from datdota and turn into list of dict
    urldat = 'http://www.datdota.com/api/ratings?date=' + end
    htmldat = requests.get(urldat)
    htmldat.encoding = 'utf-8'
    elo = BeautifulSoup(htmldat.text, "lxml")
    elodict = yaml.load(elo.text)
    endelolist = elodict['data']
    return (startelolist, endelolist)

In [5]:
def compareAndPut(df, squad, ifd, startelodict, endelodict):
    for lqn, pdc in squad.items():
        try:
            teamName = comparedict[lqn]
        except KeyError:
            teamName = lqn
        j = 0
        for srt in startelodict:
            for ert in endelodict:
                if (teamName == re.sub(r'\W+', '', srt['teamName']).lower()) & (teamName == re.sub(r'\W+', '', ert['teamName']).lower()):
                    j += 1
                    for pos, id in pdc.items():
                        df = df.append({
                            'team': teamName,
                            'position': pos,
                            'id': id,
                            'tour': ifd['title'],
                            'prizeusd': ifd['prize'],
                            'start': ifd['start'],
                            'end': ifd['end'],
                            'startelo': srt['elo64']['current'],
                            'startglicko2mu': srt['glicko2']['mu'],
                            'startglicko2phi': srt['glicko2']['phi'],
                            'startglicko2rating': srt['glicko2']['rating'],
                            'endelo': ert['elo64']['current'],
                            'endglicko2mu': ert['glicko2']['mu'],
                            'endglicko2phi': ert['glicko2']['phi'],
                            'endglicko2rating': ert['glicko2']['rating']
                        }, ignore_index=True)
        if j == 0:
            for pos, id in pdc.items():
                if id == '':
                    continue
                else:
                    df = df.append({
                        'team': teamName,
                        'position': pos,
                        'id': id,
                        'tour': ifd['title'],
                        'prizeusd': ifd['prize'],
                        'start': ifd['start'],
                        'end': ifd['end'],
                        'startelo': np.nan,
                        'startglicko2mu': np.nan,
                        'startglicko2phi': np.nan,
                        'startglicko2rating': np.nan,
                        'endelo': np.nan,
                        'endglicko2mu': np.nan,
                        'endglicko2phi': np.nan,
                        'endglicko2rating': np.nan
                    }, ignore_index=True)
        
    return df

In [6]:
alltour = load_obj('/dict/touranddate')
comparedict = load_obj('/dict/lqtodat')

In [7]:
endti2 = '2012-09-02'
endti7 = '2017-08-12'

In [8]:
%%time
errortitle = []
df = pd.DataFrame(columns=['team', 'position', 'id', 'tour', 'prizeusd', 'start', 'end',
                           'startelo', 'startglicko2mu', 'startglicko2phi', 'startglicko2rating', 'endelo', 'endglicko2mu', 'endglicko2phi', 'endglicko2rating'])
for turl, ifd in alltour.items():
    try:
        if (pd.to_datetime(ifd['start'], format='%d/%m/%Y') > pd.to_datetime(endti2)) & (pd.to_datetime(ifd['end'], format='%d/%m/%Y') <= pd.to_datetime(endti7)):
            try:
                tourdict = load_obj('old/' + re.sub(r'\W+', '', ifd['title']).lower() )
                print ('Succesfully Loaded: ' + ifd['title'])
            except FileNotFoundError:
                print ('already glickod: ' + ifd['title'])
                continue
            except NameError:
                continue
            ptc, lqjson = parseSquad(tourdict)
            startelodict, endelodict = getEloDict(ifd)
            save_obj({'wikitext': lqjson, 'startelodict': startelodict, 'endelodict': endelodict}, re.sub(r'\W+', '', ifd['title']).lower() )
            df = compareAndPut(df, ptc, ifd, startelodict, endelodict)
    except:
        continue

Succesfully Loaded: WCG Asian Championship 2012
Succesfully Loaded: RaidCall Dota 2 League Season 1
Succesfully Loaded: StarLadder StarSeries Season 3
Succesfully Loaded: GosuLeague Season 4
Succesfully Loaded: GosuLeague Season 4 Division 1
Succesfully Loaded: The Premier League Season 3
Succesfully Loaded: G-1 Champions League Season 4
Succesfully Loaded: Electronic Sports World Cup 2012
Succesfully Loaded: GosuLeague Season 5
Succesfully Loaded: GosuLeague Season 5 Division 1
Succesfully Loaded: StarLadder StarSeries Season 4
Succesfully Loaded: DreamHack Winter 2012
Succesfully Loaded: The Defense Season 3
Succesfully Loaded: World Cyber Games 2012
Succesfully Loaded: Thor Open 2012
Succesfully Loaded: RaidCall Dota 2 League Season 2
Succesfully Loaded: ASUS Open 2012 Finals
Succesfully Loaded: The Asia 2012
Succesfully Loaded: G-League 2012 Season 2
Succesfully Loaded: The Premier League Season 4
Succesfully Loaded: StarLadder StarSeries Season 5
Succesfully Loaded: Armaggeddon Do

Succesfully Loaded: World Cyber Arena 2015 - SEA Pro Qualifiers
Succesfully Loaded: MLG World Finals 2015
Succesfully Loaded: ECL 2015 Autumn
Succesfully Loaded: Nanyang Dota 2 Championships Season 1
Succesfully Loaded: PGL Dota 2 Pro-AM
Succesfully Loaded: Dota2 Battle Arena
Succesfully Loaded: Frankfurt Major 2015
Succesfully Loaded: Dota 2 Canada Cup Season 6
Succesfully Loaded: Game Show Global eSports Cup Season 1
Succesfully Loaded: Dota 2 Champions League Christmas Charity Magic
Succesfully Loaded: The Summit 4
Succesfully Loaded: World Cyber Arena 2015
Succesfully Loaded: Dota 2 Radiant & Dire Cup 2015
Succesfully Loaded: Shanghai Dota 2 Open #1
Succesfully Loaded: SL i-League StarSeries Season 1
Succesfully Loaded: Captains Draft 3.0
Succesfully Loaded: MarsTV Dota 2 League 2015 Winter
Succesfully Loaded: The Shanghai Major 2016
Succesfully Loaded: Dota Pit League Season 4
Succesfully Loaded: World Cyber Arena 2016 Chinese Qualifiers S1
Succesfully Loaded: Russian e-Sports Cup