In [2]:
from datetime import datetime
import requests
import numpy as np
import pandas as pd
import yaml
import re
from bs4 import BeautifulSoup
import wikitextparser as wtp
from ratelimit import rate_limited
from operator import *

def save_obj(obj, name ):
    import pickle
    with open('obj/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
        
def load_obj(name ):
    import pickle
    with open('obj/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [3]:
def parseSquad(tourdict):
    '''
    input: title of tournament
    output: dictionary of participants of that tournament
    '''
    json = tourdict['wikitext']
    wiki = json.get('parse').get('wikitext').get('*')
    wiki = re.sub('\n', '', wiki)
    parsed = wtp.parse(wiki)
    templates = parsed.templates
    squad = {}
    for t in templates:
        if str(t)[:11] == '{{TeamCard|':
            for ar in t.arguments:
                if (str(ar)[:5] == '|team'):
                    teamname = re.sub(r'\W+', '', ar.value).lower()
                    squad[teamname] = {}
                for i in range(1, 6):
                    if (str(ar)[:4] == '|p' + str(i) + '='): 
                        pid = re.sub(r'\W+', '', ar.value).lower()
                        try:
                            pid = AlternateIdDict[re.sub(r'\W+', '', ar.value).lower()]
                        except KeyError:
                            pid = re.sub(r'\W+', '', ar.value).lower()
                        squad[teamname]['pos' + str(i)] = re.sub(r'\W+', '', pid).lower()
                    if (str(ar)[:7] == '|p' + str(i) + 'link'):
                        squad[teamname]['pos' + str(i)] = squad[teamname]['pos' + str(i)] + ' haslink'
    return squad

In [None]:
    json = tourdict['wikitext']
    wiki = json.get('parse').get('wikitext').get('*')
    wiki = re.sub('\n', '', wiki)
    parsed = wtp.parse(wiki)
    templates = parsed.templates
    squad = {}


In [3]:
def getEloDict(tourdict):
    return (tourdict['startelodict'], tourdict['endelodict'])

In [4]:
def compareAndPut(df, squad, ifd, startelodict, endelodict):
    for lqn, pdc in squad.items():
        try:
            teamName = comparedict[lqn]
        except KeyError:
            teamName = lqn
        j = 0
        for srt in startelodict:
            for ert in endelodict:
                if (teamName == re.sub(r'\W+', '', srt['teamName']).lower()) & (teamName == re.sub(r'\W+', '', ert['teamName']).lower()):
                    j += 1
                    for pos, id in pdc.items():
                        df = df.append({
                            'team': teamName,
                            'pos': pos,
                            'id': id,
                            'tour': ifd['title'],
                            'prizeusd': ifd['prize'],
                            'start': ifd['start'],
                            'end': ifd['end'],
                            'startelo': srt['elo64']['current'],
                            'startglicko2mu': srt['glicko2']['mu'],
                            'startglicko2phi': srt['glicko2']['phi'],
                            'startglicko2rating': srt['glicko2']['rating'],
                            'endelo': ert['elo64']['current'],
                            'endglicko2mu': ert['glicko2']['mu'],
                            'endglicko2phi': ert['glicko2']['phi'],
                            'endglicko2rating': ert['glicko2']['rating']
                        }, ignore_index=True)
        if j == 0:
            for pos, id in pdc.items():
                if id == '':
                    continue
                else:
                    df = df.append({
                        'team': teamName,
                        'pos': pos,
                        'id': id,
                        'tour': ifd['title'],
                        'prizeusd': ifd['prize'],
                        'start': ifd['start'],
                        'end': ifd['end'],
                        'startelo': np.nan,
                        'startglicko2mu': np.nan,
                        'startglicko2phi': np.nan,
                        'startglicko2rating': np.nan,
                        'endelo': np.nan,
                        'endglicko2mu': np.nan,
                        'endglicko2phi': np.nan,
                        'endglicko2rating': np.nan
                    }, ignore_index=True)
        
    return df

In [5]:
alltour = load_obj('/dict/touranddate')
comparedict = load_obj('/dict/lqtodat')
AlternateIdDict = load_obj('/dict/AlternateIdDict')

In [6]:
endti2 = '2012-09-02'
endti7 = '2017-08-12'

In [7]:
alltour

{'2013 WPC ACE Dota 2 League': {'end': '01/01/2014',
  'prize': 247920,
  'start': '22/09/2013',
  'title': '2013 WPC ACE Dota 2 League'},
 '2015 Red Bull Battle Grounds: Dota 2': {'end': '10/05/2015',
  'prize': 88290,
  'start': '06/04/2015',
  'title': '2015 Red Bull Battle Grounds: Dota 2'},
 'AMD Premier League/Season 1': {'end': '15/05/2013',
  'prize': 5000,
  'start': '06/05/2013',
  'title': 'AMD Premier League Season 1'},
 'AMD Premier League/Season 2': {'end': '23/07/2013',
  'prize': 5000,
  'start': '17/07/2013',
  'title': 'AMD Premier League Season 2'},
 'ASUS Open 2012': {'end': '16/12/2012',
  'prize': 15000,
  'start': '13/12/2012',
  'title': 'ASUS Open 2012 Finals'},
 'Adrenaline Cyber League/2017': {'end': '22/11/2017',
  'prize': 100000,
  'start': '21/11/2017',
  'title': 'Adrenaline Cyber League'},
 'Alienware 2012 Battlegrounds': {'end': '25/09/2012',
  'prize': 3000,
  'start': '01/08/2012',
  'title': 'Alienware 2012 Battlegrounds'},
 'Alienware Cup': {'end':

In [9]:
tourdict = load_obj('forelo/' + re.sub(r'\W+', '', '2013 WPC ACE Dota 2 League'.lower() ))

In [10]:
tourdict

{'endelodict': [{'elo32': {'current': 967.6424010159426,
    'sevenDayAgo': 966.9395984591334,
    'sevenDayAvg': 967.1824675368268,
    'thirtyDayAgo': 982.6979187144788,
    'thirtyDayAvg': 974.6809589621979},
   'elo64': {'current': 984.2741322992424,
    'sevenDayAgo': 986.6972677396512,
    'sevenDayAvg': 985.0802727551948,
    'thirtyDayAgo': 1009.4437392270679,
    'thirtyDayAvg': 1002.8138460005679},
   'eloRatingDate': 1388527200000,
   'glicko': {'mu': 1705.536639884528,
    'phi': None,
    'rating': 1593.604613169195,
    'ratingSevenDaysAgo': 1596.183668269253,
    'sigma': 44.772810686133205},
   'glicko2': {'mu': 1658.1122991890452,
    'phi': 41.887996933321304,
    'rating': 1553.392306855742,
    'ratingSevenDaysAgo': 1560.8083888166195,
    'sigma': None},
   'glickoRatingDate': 1388354400000,
   'lossesLastMonth': None,
   'teamName': 'mousesports',
   'valveId': 26,
   'winsLastMonth': None},
  {'elo32': {'current': 943.4549364571801,
    'sevenDayAgo': 940.6647696

In [7]:
%%time
errortitle = []
df = pd.DataFrame(columns=['team', 'pos', 'id', 'tour', 'prizeusd', 'start', 'end',
                           'startelo', 'startglicko2mu', 'startglicko2phi', 'startglicko2rating', 'endelo', 'endglicko2mu', 'endglicko2phi', 'endglicko2rating'])
for turl, ifd in alltour.items():
    try:
        if (pd.to_datetime(ifd['start'], format='%d/%m/%Y') > pd.to_datetime(endti2)) & (pd.to_datetime(ifd['end'], format='%d/%m/%Y') <= pd.to_datetime(endti7)):
            tourdict = load_obj('forelo/' + re.sub(r'\W+', '', ifd['title']).lower() )
            ptc = parseSquad(tourdict)
            startelodict, endelodict = getEloDict(tourdict)
            df = compareAndPut(df, ptc, ifd, startelodict, endelodict)
    except:
        print(turl)
        errortitle = errortitle.append(turl)

Wall time: 1min 38s


In [8]:
#Clean Data
df = df[df['id'] != '']
df.loc[(df['id'] == 'g haslink'), 'id'] = 'g'
df.loc[(df['id'] == 'noone haslink'), 'id'] = 'noone'
df.loc[(df['id'].str[:8] == 'paparazi'), 'id'] = 'paparazi'
df.pos = pd.to_numeric(df.pos.str[-1])

In [9]:
df[['team', 'pos', 'id', 'tour', 'prizeusd', 'start', 'startelo', 'end', 'endelo']].to_csv('elo.csv', encoding='utf-8', index=False)
#df[['team', 'pos', 'id', 'tour', 'prizeusd', 'start', 'startglicko2mu', 'startglicko2phi', 'startglicko2rating', 'end', 'endglicko2mu', 'endglicko2phi', 'endglicko2rating']].to_csv('glicko.csv', encoding='utf-8', index=False)