In [2]:
import pandas as pd
import numpy as np
import datetime
import os
import json
import dota2

# The "API_KEY" can be obtained with you account: https://www.opendota.com/api-keys
api_key = '428695AC0D1C83B0C8D6F50A7EC1883C' 
dota = dota2.Dota2(api_key)
dota.is_valid

True

In [3]:
# Create a function to extract info from string data of one match

def extractMatchInfo(data):
    '''
    A function used to extract info from string data of one match into dataframes.
    
    INPUT:
    data - data of one match in dictionary (originally obtained from https://api.opendota.com/api/publicMatches)
    
    OUTPUT:
    arr - a list of match information: match_id, time, game_mode, 
    '''
    
    arr = []
    
    # Save match_id
    arr.append(data['match_id'])
    
    # Save game_mode. In this exercise, we are looking for matches with game_mode 2
    arr.append(data['game_mode'])
    
    # Save start_time converted to datetime format
    arr.append(datetime.datetime.fromtimestamp(int(data['start_time'])).strftime('%Y-%m-%d %H:%M:%S'))
    
    # Save duration
    arr.append(data['duration'])
    
    # Save lobby_type
    arr.append(data['lobby_type'])
    
    # Save picks for both teams: team radiant goes first
    radiant = [int(i) for i in data['radiant_team'].split(",")]
    dire = [int(i) for i in data['dire_team'].split(",")]
    arr.extend(radiant)
    arr.extend(dire)
    
    # Save result: radiant_win
    arr.append(data['radiant_win'])
    
    return arr

In [4]:
# Get publicMatches data to create a dataframe for later use

os.system("curl https://api.opendota.com/api/publicMatches > publicMatches.json")
with open('publicMatches.json') as f:
    data = json.load(f)

df = []
for m in data:
    arr = extractMatchInfo(m)
    df.append(arr)
match_id = pd.DataFrame(df).sort_values(by = [0]).iloc[0, 0]

# For each query, we could obtain data for 100 matches. The following codes keeps querying for multiple times to obtain data for more matches.

for _ in range(300): # choose whatever number of rounds you'd like to have for your dataset
    os.system("curl https://api.opendota.com/api/publicMatches?less_than_match_id={} > publicMatches.json".format(match_id))
    with open('publicMatches.json') as f:
        data = json.load(f)
    for m in data:
        arr = extractMatchInfo(m)
        df.append(arr)
    match_id = pd.DataFrame(df).sort_values(by = [0]).iloc[0, 0]

In [52]:
# Rename columns and save to a .csv file, such that we could read and load the data for future analysis

pd.DataFrame(df).drop_duplicates(subset = [0], inplace = True, keep = 'last')
df = pd.DataFrame(df)
df.rename(columns={0: "match_id", 1: "game_mode", 2: "start_time", 3: "duration", 4: "lobby_type", 
                   5: "radiant_1", 6: "radiant_2", 7: "radiant_3", 8: "radiant_4", 9: "radiant_5", 
                   10: "dire_1", 11: "dire_2", 12: "dire_3", 13: "dire_4", 14: "dire_5", 15: "radiant_win"}, inplace = True)
df.to_csv('match_df.csv', index=False)

In [53]:
# We can also extract heroes' information from Dota2 API.

os.system("curl https://api.opendota.com/api/heroes > heroes.json")

# If you'd like, you could also get team data or hero statistics in recent matches

os.system("curl https://api.opendota.com/api/teams > teams.json")
os.system("curl https://api.opendota.com/api/heroStats > heroStats.json")


0