In [1]:
import json
import yaml
import dotabet
from dotabet.fetch import fetch_data
import datetime

endpoint_template = 'https://api.opendota.com/api/matches/{}'
meta_file = '../data/1pro_games_meta.yaml'  # Metadata file path

keys2keep = "dire_captain dire_name dire_team_id game_mode leagueid start_time\
 lobby_type metadata patch picks_bans players radiant_captain radiant_gold_adv\
 radiant_name radiant_score radiant_team_id radiant_win radiant_xp_adv\
 tower_status_dire tower_status_radiant version".split()

players_keys2keep = "account_id actions_per_min ancient_kills assists\
 benchmarks buyback_count camps_stacked cluster creeps_stacked deaths denies\
 firstblood_claimed gold_per_min gold_t hero_damage hero_healing hero_id hero_kills\
 kda kill_streaks kills kills_per_min lane lane_efficiency lane_efficiency_pct\
 lane_kills lane_role last_hits lh_t life_state_dead lose multi_kills net_worth\
 neutral_kills observers_placed observer_kills patch pings radiant_win rank_tier\
 roshan_kills rune_pickups sentry_kills sen_placed teamfight_participation\
 total_gold total_xp tower_damage tower_kills win xp_per_min xp_t".split()

def get_filtered_data(fetched_data, match_id):
    filtered_data = {'match_id' : match_id}
    for key in keys2keep:
        if key == 'players':
            filtered_data['players'] = []
            for player_dict in fetched_data['players']:
                filtered_player = {key: player_dict[key] for key in players_keys2keep if key in player_dict}
                filtered_data['players'].append(filtered_player)
        else:
            filtered_data[key] = fetched_data.get(key, None)
    return filtered_data

In [2]:
# CUSTOMIZE:
"""
new   : fetch new data and update 1pro_games_new.json
old   : continue to fetch old data and update 1pro_games
check : check if some matches between max and mix match_id was missing
"""
mode = 'new'

In [3]:
if mode == 'new':
    output_file = '../data/1pro_games_new.json'
    data = dotabet.utils.load_tmp_file(output_file)
elif mode == 'old':
    output_file = '../data/1pro_games.json'
    data = dotabet.utils.get_merged_data(output_file)
elif mode == 'check':
    output_file = '../data/1pro_games_new.json'
    file_paths = ['../data/1pro_games_new.json', '../data/1pro_games.json']
    data = dotabet.utils.get_merged_data(file_paths)
else:
    pass 
    
data_mids = [m['match_id'] for m in data]

if mode == 'new':
    start_match_id = None 
    stop_match_id = max(data_mids) 
    recent_start_time = data[data_mids.index(stop_match_id)]['start_time']
    recent_date = datetime.datetime.utcfromtimestamp(recent_start_time)
    print(f"Ready to fetch new. Most recent id: {stop_match_id} ({recent_date.strftime('%d %B %Y %H:%M')})")
elif mode == 'old':
    start_match_id = min(data_mids)
    stop_match_id = 0
    old_start_time = data[data_mids.index(start_match_id)]['start_time']
    old_date = datetime.datetime.utcfromtimestamp(old_start_time)
    print(f"Continue fetch old data from {start_match_id} ({old_date.strftime('%d %B %Y %H:%M')})")
elif mode == 'check':
    start_match_id = None 
    stop_match_id = min(data_mids)
    old_start_time = data[data_mids.index(stop_match_id)]['start_time']
    oldest_date = datetime.datetime.utcfromtimestamp(old_start_time)
    print(f"I will check if any match is missing from the most recent up to {stop_match_id} ({oldest_date.strftime('%d %B %Y %H:%M')})")
else:
    assert 0==1, 'unexpected error'

Ready to fetch new. Most recent id: 7686395200 (14 April 2024 18:50)


In [4]:
stop = False
if start_match_id:
    endpoint_proMatches = f"https://api.opendota.com/api/proMatches?less_than_match_id={start_match_id}"
else:
    endpoint_proMatches = "https://api.opendota.com/api/proMatches"

while 1:
    fetched_proMatches = fetch_data(endpoint_proMatches)
    fetched_mids = [m['match_id'] for m in fetched_proMatches]
    for i,match_id in enumerate(fetched_mids):
        if match_id in data_mids:
            continue
        if match_id <= stop_match_id:
            stop = True
            print(f"Stop! Reached {match_id=}")
            break
            
        endpoint = endpoint_template.format(match_id)
        fetched_data = fetch_data(endpoint)
        if fetched_data:
            if mode == 'check':
                print(f"Catched missing match! {match_id}")
            filtered_data = get_filtered_data(fetched_data, match_id)
            with open(output_file, 'a') as file:
                json.dump(filtered_data, file)
                file.write(',') 
            print(f"[{i}] {match_id=}✔️", end='')  
        else:
            print("No new data fetched. Ending loop.")
            break
    if stop:
        break
    endpoint_proMatches = f"https://api.opendota.com/api/proMatches?less_than_match_id={match_id}"
    print("Next 💯")

[0] match_id=7688762413✔️[1] match_id=7688681911✔️[2] match_id=7688643820✔️[3] match_id=7688602905✔️[4] match_id=7688597133✔️[5] match_id=7688590357✔️[6] match_id=7688535551✔️[7] match_id=7688509293✔️[8] match_id=7688501462✔️[9] match_id=7688493738✔️[10] match_id=7688491367✔️[11] match_id=7688474328✔️[12] match_id=7688471003✔️[13] match_id=7688461312✔️[14] match_id=7688460624✔️[15] match_id=7688452628✔️[16] match_id=7688433679✔️[17] match_id=7688433609✔️[18] match_id=7688429228✔️[19] match_id=7688426923✔️[20] match_id=7688426697✔️[21] match_id=7688408092✔️[22] match_id=7688395432✔️[23] match_id=7688391111✔️[24] match_id=7688382319✔️[25] match_id=7688374705✔️[26] match_id=7688368536✔️[27] match_id=7688362718✔️[28] match_id=7688338128✔️[29] match_id=7688331628✔️[30] match_id=7688331208✔️[31] match_id=7688330356✔️[32] match_id=7688307349✔️[33] match_id=7688303275✔️[34] match_id=7688301527✔️[35] match_id=7688275166✔️[36] match_id=7688273404✔️[37] match_id=7688271437✔️[38] match_id=76882446