# Age of Empires webcrawler for RM 1V1 leaderboard

### Iterative connection to the aoe2.net API

A custom crawler is required because the above API restricts downloads to 10000; however this is not enough data to infer meaningful statistics, as there are many more than 1,000 players in the community.


In [1]:
import pyodbc as db
import pandas as pd
import json
import urllib
import datetime
import time
import sys

In [38]:
# API string
api_str = 'https://aoe2.net/api/leaderboard?game=aoe2de&leaderboard_id=3&count=10000&start='
rank_start = 1  # start point for ranking

# set up json-to-save structure
json_total = list()

# set up save name structure
save_filename_root = 'C:/Users/richa/OneDrive/Documents/Career/Portfolio/AOE2/API Dataleaderboard.json'

# set up error handlers
max_allowed_invalid = 5  # max allowed consecutive connection issues

# set up counters
current_rank = rank_start


In [39]:
# iterate through JSONSs from the API
invalid_count = 0 # current consecutive url connection errors

while True:
        
    # get the url to access the API
    current_api_string = api_str + str(current_rank)
    
    print('Retrieving ranks {0} and higher from: {1}'.format(current_rank, current_api_string))
        
    try:
        with urllib.request.urlopen(current_api_string,timeout=30) as url:
            data = json.loads(url.read().decode())
            
    except:
        invalid_count += 1
        
        
        if invalid_count > max_allowed_invalid:
            print('Too many consecutive connection failures ({0}). Exiting process'.format(max_allowed_invalid))
            break
        
        time.sleep(1)
        continue
        
    # if we get this far, then we have a successful connection. Reset invalid counter
    invalid_count = 0
    
    [json_total.append(i) for i in data['leaderboard']]
    
    # get highest rank (i.e. most recent record) - should be just +10,000 from previous
    current_rank = data['leaderboard'][-1]['rank'] + 1
    
    
    # check to see if there is any data (i.e. have we reached the end of the leaderboard?)
    if data['count'] != 10000:
        print('No more records available. Exiting')
        break

        
# save to .json
with open(save_filename_root,'w') as outfile:
            json.dump(json_total, outfile)
            outfile.close()

Retrieving ranks 1 and higher from: https://aoe2.net/api/leaderboard?game=aoe2de&leaderboard_id=3&count=10000&start=1
Retrieving ranks 10001 and higher from: https://aoe2.net/api/leaderboard?game=aoe2de&leaderboard_id=3&count=10000&start=10001
Retrieving ranks 20001 and higher from: https://aoe2.net/api/leaderboard?game=aoe2de&leaderboard_id=3&count=10000&start=20001
Retrieving ranks 30001 and higher from: https://aoe2.net/api/leaderboard?game=aoe2de&leaderboard_id=3&count=10000&start=30001
Retrieving ranks 40001 and higher from: https://aoe2.net/api/leaderboard?game=aoe2de&leaderboard_id=3&count=10000&start=40001
No more records available. Exiting
