In [31]:
from datetime import datetime
import requests
import numpy as np
import pandas as pd
import yaml
import re
from bs4 import BeautifulSoup
import wikitextparser as wtp
from ratelimit import rate_limited
from operator import *

def save_obj(obj, name ):
    import pickle
    with open('obj/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
        
def load_obj(name ):
    import pickle
    with open('obj/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

def easyText(t):
    return re.sub(r'\W+', '', str(t)).lower()

In [32]:
r = requests.get(url='http://liquipedia.net/dota2/api.php?action=ask&query=[[%3A%2B]]+[[Has+age%3A%3A>0]]|%3FHas+id|%3FHas+role|%3FHas+birth+day|%3FHas+name|%3FHas+nationality|sort%3DHas+id|limit%3D500&format=json')
json2 = r.json()

In [35]:
df = pd.DataFrame(columns=['cog', 'name', 'id', 'dob'])

In [36]:
for lname, ldata in json2.get('query').get('results').items():
    po = ldata.get('printouts')
    pid = ldata.get('fulltext')
    try:
        cog = po.get('Has nationality')[0]
    except IndexError:
        cog = 'Nan' 
    try:
        dob = pd.Timestamp.date(pd.to_datetime(po.get('Has birth day')[0]['timestamp'], unit='s'))
    except IndexError:
        dob = 'Nan'
    try:
        name = po.get('Has name')[0]
    except IndexError:
        name = 'Nan'        
    df = df.append({'cog': cog, 'name': name, 'id': pid, 'dob': dob}, ignore_index=True)

In [38]:
df.to_csv('dob.csv', index=False, encoding='utf-8')

In [14]:
titlelist = []
for lname, ldata in json2.get('query').get('results').items():
    pid = ldata.get('fulltext')
    titlelist.append(pid)


In [24]:
union = set(titlelist).union(titlelistall)

In [25]:
union

{'.Ark',
 '13abyKnight',
 '1437',
 '290',
 '2GD',
 '33',
 '343',
 '4DR',
 '5400',
 '7ckngMad',
 '820',
 '86',
 '897',
 '9pasha',
 'ADTR',
 'ALOHADANCE',
 'ALWAYSWANNAFLY',
 'ARS-ART',
 'AabBAA',
 'Ab1ng',
 'Abed',
 'Accel',
 'Ace',
 'AdmiralBulldog',
 'Admiration',
 'Adong',
 'Afoninje',
 'AfrOmoush',
 'AfterLife',
 'Agressif',
 'Ahfu',
 'Ahjit',
 'Air',
 'Akke',
 'Alex',
 'Ame',
 'Ana',
 'AnneeDroid',
 'Another',
 'Apemother',
 'Arise',
 'Ariunbolor',
 'Armel',
 'Arms',
 'ArsZeeqq',
 'Arteezy',
 'Artes',
 'Artstyle',
 'Attacker',
 'Atun',
 'Atze',
 'Aui 2000',
 'Aville',
 'Awoke',
 'Ax.Mo',
 'Axx',
 'Axypa',
 'Ayesee',
 'Ayo',
 'B',
 'BLink',
 'BOne7',
 'BYB',
 'Baga',
 'Balsam',
 'Banana',
 'BananaSlamJamma',
 'Barash',
 'Bashruk',
 'Basskip',
 'Bdiz',
 'Beesa',
 'Ben',
 'Benhur',
 'Benjaz',
 'Bignum',
 'Bimbo',
 'Biryu',
 'Biver',
 'Black^',
 'Blazemon',
 'Blitz',
 'Blizzy',
 'Bloody Nine',
 'Blowyourbrain',
 'BoBoKa',
 'Bok',
 'Boombacs',
 'Boombell',
 'Boris',
 'Boxi',
 'Brax',
 '

In [9]:
@rate_limited(1, 2)
def requestQuery(title):
    headers = {
    'User-Agent': 'Data for Research',
    'From': 'terthasarit@live.com', 
    'Accept-Encoding': 'gzip'
    }
    r = requests.get(url='http://liquipedia.net/dota2/api.php?action=query&format=json&prop=redirects&titles=' + title, headers=headers)
    json = r.json()
    return json

In [30]:
save_obj(AlternateIdDict, '/dict/AlternateIdDict')

In [27]:
AlternateIdDict = {}

In [28]:
%%time
for title in union:
    json = requestQuery(title)
    for pageid, ldata in json.get('query').get('pages').items():
        mainid = easyText(ldata['title'])
        try:
            AlternateIdDict[mainid] = mainid
            for altid in ldata['redirects']:
                AlternateIdDict[easyText(altid['title'])] = mainid
        except KeyError:
            AlternateIdDict[mainid] = mainid

Wall time: 21min 39s


In [29]:
AlternateIdDict

{'123456': 'xiao2lei',
 '13abyknight': '13abyknight',
 '1437': '1437',
 '290': '290',
 '2gd': '2gd',
 '2lei': 'xiao2lei',
 '33': '33',
 '333': 'liekkas',
 '343': '343',
 '357': 'qqq',
 '430': 'ferrari430',
 '4dr': '4dr',
 '5400': '5400',
 '633': 'bzzisperfect',
 '716': 'owa',
 '747': 'ryoya',
 '7ckngmad': '7ckngmad',
 '820': '820',
 '86': '86',
 '876': 'mksos',
 '897': '897',
 '9pasha': '9pasha',
 '9pashaebashu': '9pasha',
 'aaabbbaaa': 'aabbaa',
 'aabbaa': 'aabbaa',
 'ab1ng': 'ab1ng',
 'abed': 'abed',
 'accel': 'accel',
 'ace': 'ace',
 'aczino': 'scofield',
 'adam': '343',
 'admiralbulldog': 'admiralbulldog',
 'admiration': 'admiration',
 'adong': 'adong',
 'adtr': 'adtr',
 'afoninje': 'afoninje',
 'afromoush': 'afromoush',
 'afterlife': 'afterlife',
 'agressif': 'agressif',
 'ahfu': 'ahfu',
 'ahjit': 'ahjit',
 'air': 'air',
 'airmangataosenai': 'illidan',
 'akke': 'akke',
 'alex': 'alex',
 'alohadance': 'alohadance',
 'alwayswannafly': 'alwayswannafly',
 'ame': 'ame',
 'ana': 'ana',


In [None]:
 json.get('query').get('results')['N0tail']

In [None]:
len(df)

In [None]:
def AlternateIdDict(AlternateIdDict, json):
    for pageid, ldata in json.get('query').get('pages').items():
        mainid = easyText(ldata['title'])
        try:
            AlternateIdDict[mainid] = mainid
            for altid in ldata['redirects']:
                AlternateIdDict[easyText(altid['title'])] = mainid
        except KeyError:
            AlternateIdDict[mainid] = mainid
    return AlternateIdDict

In [None]:
df['idlow'] = df.id

In [None]:
df.idlow = df.idlow.str.replace(r'\W+', '').str.lower()

In [None]:
df[df['player'] == 'kpii']

In [None]:
df.to_csv('idanddob.csv', index=False)