In [1]:
import time
import json
import requests
import pandas as pd

In [2]:
class OpenDotaAPI():

    def __init__(self, verbose = False):
        self.verbose = verbose

    def _call(self, url, parameters, tries= 2):
        for i in range(tries):
            try:
                if self.verbose: print("Sending API request... ", end="", flush=True)
                resp = requests.get(url, params= parameters, timeout= 20)
                load_resp = json.loads(resp.text)
                if self.verbose: print("done")
                return load_resp
            except Exception as e:
                print("failed. Trying again in 5s")
                print(e)
                time.sleep(5)
        else:
            ValueError("Unable to connect to OpenDota API")

    # Return a list of 100 recent matches; save smaller match_id
    def get_recent_matches(self, last_match_id):
        params = dict()
        params['less_than_match_id'] = last_match_id
        url = "https://api.opendota.com/api/proMatches"
        matches = self._call(url, params)
        return matches

    # Return a dictionary with match information
    def get_match_info(self, match_id):
        url = "https://api.opendota.com/api/matches/" + str(match_id)
        return self._call(url, None)

    # Return a list with player's heroes history
    def get_player_heroes_history(self, account_id):
        url = "https://api.opendota.com/api/players/{}/heroes".format(account_id)
        return self._call(url, None)

    # Return a list with player's heroes ranking history
    def get_player_heroes_ranking(self, account_id):
        url = "https://api.opendota.com/api/players/{}/rankings".format(account_id)
        return self._call(url, None)

In [3]:
class DataPreprocessing():
    def __init__(self):
        # Initialize tables as empty dataframes
        self.matches = pd.DataFrame()

    def get_match(self, match):
        """ Get general information from the match and append to self.matches. """

        fields = ['match_id', 'draft_timings', 'radiant_win', 'radiant_team', 'dire_team']

        proc_match = {key: [match[key]] for key in fields}
        self.matches = self.matches.append(pd.DataFrame(proc_match), ignore_index=True)

    def get_player_heroes(self, hero):
        """ Get general information from the match and append to self.matches. """

        fields = ['hero_id', 'games', 'win','games','win','with_games','with_win','against_games','against_win']

        proc_match = {key: [hero[key]] for key in fields}
        self.matches = self.matches.append(pd.DataFrame(proc_match), ignore_index=True)

    def get_player_rankings(self, hero):
        """ Get general information from the match and append to self.matches. """

        fields = ['hero_id', 'percent_rank']

        proc_match = {key: [hero[key]] for key in fields}
        self.matches = self.matches.append(pd.DataFrame(proc_match), ignore_index=True)

    def get_all_current_match_tables(self, match_details):
        """ Get all tables from a current match, except the previous matches. """
        self.get_match(match_details)

Para obtener todas la información de las partidas

In [None]:
#Cambiar ID Por el Ultimo Obtenido en el Dataset
def main_matches(sleep_time = 2, min_match_id = 6616234032):
    api = OpenDotaAPI(verbose= True)
    data = DataPreprocessing()
    recent_matches = api.get_recent_matches(min_match_id)
    for recent_match in recent_matches:
        time.sleep(sleep_time)
        match_details = api.get_match_info(recent_match['match_id'])
        data.get_all_current_match_tables(match_details)
    return data

Para obtener la información del usuario. Player Heroes

In [4]:
def main_player(sleep_time = 2, account_id = 173869537):
    api = OpenDotaAPI(verbose= True)
    data = DataPreprocessing()
    heroes_history = api.get_player_heroes_history(account_id)
    for heroe in heroes_history:
        time.sleep(sleep_time)
        data.get_player_heroes(heroe)
    return data

Creamos el dataset

In [7]:
df = main_player()
df = df.matches
df.to_csv("Player_heroes.csv", index=False)

Sending API request... done


  self.matches = self.matches.append(pd.DataFrame(proc_match), ignore_index=True)


    hero_id  games  win  with_games  with_win  against_games  against_win
0        69    116   52          48        29             64           35
1        67     80   56          90        54             83           45
2       102     77   51          65        35             78           36
3        51     72   38          56        25             75           43
4        71     70   41         151        89            196           93
..      ...    ...  ...         ...       ...            ...          ...
118     123      0    0          17         9             15            8
119     126      0    0          13         6              5            2
120     128      0    0          16         7              7            5
121     136      0    0          13         5             16            7
122     137      0    0           5         2              8            3

[123 rows x 7 columns]


Para crear en secuencia muchos dataset de 100 partidas cada uno

In [None]:
ultimo_id = 6616234032
contador = 0

for _ in range(10):
    df = main_matches(min_match_id = ultimo_id)
    ultimo_id = df['match_id'].min()
    df.to_csv("Dataset_train_{}.csv".format(contador), index=False)
    contador += 1

Modificar el Dataset de partidas para tener valores 1 y 0 para cada heroe

In [None]:
ori = df
df = ori[["radiant_win","radiant_team","dire_team"]]

x = df["radiant_team"].str.split(r',', expand=True).stack().reset_index(level=1, drop=True).to_frame('radiant_team')
x = pd.get_dummies(x, prefix='', prefix_sep="", columns=['radiant_team']).groupby(level=0).sum().astype("int")
x.columns = x.columns.map(int)
x = x.reindex(sorted(x.columns), axis=1)

y = df["dire_team"].str.split(r',', expand=True).stack().reset_index(level=1, drop=True).to_frame('dire_team')
y = pd.get_dummies(y, prefix='', prefix_sep="", columns=['dire_team']).groupby(level=0).sum().astype("int")
y.columns = y.columns.map(int)
y = y.reindex(sorted(y.columns), axis=1)

hero = x-y
hero["radiant_win"] = df["radiant_win"].astype("int")

Preprocess Player Data. Removing Heroes with Less Than 5 Games Played. Inner Join with Player Heroes Rankings.

In [10]:
heroes = pd.read_csv("Player_heroes.csv")
heroes = heroes.drop(heroes[heroes.games < 6].index)