# All Imports

In [3]:
import os
from dotenv import load_dotenv
import requests
import time
import sys
from concurrent.futures import ThreadPoolExecutor, as_completed
from threading import Semaphore
import threading
from datetime import datetime
import pandas as pd

# Globals

In [4]:
DEFAULT_WR = 0.45
MAX_REQUESTS_PER_SECOND = 20
MAX_REQUESTS_PER_2MINUTE = 100
# Can use later for getting better avg/accounting for other gamemodes
EXCLUDED_QUEUE_IDS = {
    0,    # Custom games
    830,  # Co-op vs. AI: Intro bots
    840,  # Co-op vs. AI: Beginner bots
    850,  # Co-op vs. AI: Intermediate bots
    450,  # ARAM
    900,  # ARURF
    920,  # Nexus Blitz
    1300  # Nexus Blitz (old)
    # Add more queue IDs to exclude other game modes
}
# Ranks used for grabbing data from
SAMPLE_RANKS = {
    # Casual players are usually from silver to platinum
    "SILVER",
    "GOLD",
    "PLATINUM"
}
DIVISIONS = {
    "I",
    "II",
    "III",
    "IV"
}
SAMPLE_SIZE_SCALE = 3 # X for each rank & division (X*3*4*3), (X*num_sample_ranks*divisions*num_for_each_player)
NUM_SAMPLE_FOR_EACH_PLAYER = 3 # Too much will result in data skewed based on particular player performance, Capped at 20
OVERRIDE_AND_CREATE_NEW_DATA = False

# Getting Data

In [5]:
# Set up API (NA1 Region Only)
load_dotenv('.env')
api_key = os.getenv("RIOT_API_KEY")

In [6]:
# Testing key
#print(api_key)

In [7]:
# Custom RateLimiter class
class RateLimiter:
    def __init__(self, max_calls_1, period_1, max_calls_2, period_2):
        # Init for keeping tracking of calls per second (period_1) and 2min (period2)
        self.max_calls_1 = max_calls_1
        self.period_1 = period_1
        self.calls_1 = 0
        self.start_time_1 = time.time()

        self.max_calls_2 = max_calls_2
        self.period_2 = period_2
        self.calls_2 = 0
        self.start_time_2 = time.time()

    def acquire(self):
        # Called everytime right before API is used to time it and ensure no requests more than x per sec and y per 2min
        current_time = time.time()

        # Find time elapsed since first 
        elapsed_1 = current_time - self.start_time_1
        elapsed_2 = current_time - self.start_time_2

        # Reset calls and start time if period passed
        if elapsed_1 > self.period_1:
            self.calls_1 = 0
            self.start_time = current_time

        if elapsed_2 > self.period_2:
            self.calls_2 = 0
            self.start_time_2 = current_time

        # Proceed to sleep or not depending on if max calls per second exceeded
        if self.calls_1 < self.max_calls_1:
            self.calls_1 += 1
        else:
            time_to_wait = self.period_1 - elapsed_1
            if time_to_wait > 0:
                print(f"Rate limit reached for {MAX_REQUESTS_PER_SECOND}/1s. Sleeping for {time_to_wait:.2f} seconds.")
                time.sleep(time_to_wait)
            self.calls_1 = 1
            self.start_time_1 = time.time()

        # Proceed to sleep or not depending on if max calls per 2min exceeded
        if self.calls_2 < self.max_calls_2:
            self.calls_2 += 1
        else:
            time_to_wait = self.period_2 - elapsed_2
            if time_to_wait > 0:
                print(f"Rate limit reached for {MAX_REQUESTS_PER_2MINUTE}/2m. Sleeping for {time_to_wait:.2f} seconds.")
                time.sleep(time_to_wait)
            self.calls_2 = 1
            self.start_time_2 = time.time()

In [8]:
# Function for detecting and handling limit reached (429) and NOT success (200)
def apiCallHandler(request_url, rate_limiter):
    #print(request_url)

    rate_limiter.acquire()

    headers = {
        "X-Riot-Token": api_key 
    }
    
    response = requests.get(request_url, headers=headers)

    numFailedRetries = 0
    while response.status_code != 200: # while loop here for later when we want to ignore error 429
        # Retry limiter
        if(numFailedRetries >= 2):
            sys.exit(f"Exceeded retry limit of {numRetries}")
            
        if(response.status_code == 429):
            # Not success but is 429 API limit error
            print("Status 429 detected")
            retry_after = int(response.headers.get("Retry-After", 0))
            # 429, retry
            print(f"Retrying in {retry_after}")
            time.sleep(retry_after)
            numFailedRetries += 1
            rate_limiter.acquire()
            response = requests.get(request_url, headers=headers)
            
        else:
            # Not success and not 429 API limit error
            print(f"Failed to fetch data: {response.status_code}")
            sys.exit("Stopping all execution")
        
    # (finally) status of 200
    return response.json()

In [9]:
# Function for using multithreading when calling multiple APIs
# urls = a list of urls desired (and compatible) to multithread
# work = optional work function that does work on each value
def multithread_call(urls, work, rate_limiter):
    results = [None] * len(urls)
    future_to_index = {}
    # Threadpoolexecutor ensures max concurrent workers don't exceed
    with ThreadPoolExecutor(max_workers=MAX_REQUESTS_PER_SECOND) as executor:
        # "Future" objects store the future value of the API call, it is mapped to an index
        future_to_index = {executor.submit(apiCallHandler, url, rate_limiter): index for index,url in enumerate(urls)}
        # as_completed takes "Future" objects in the order they complete
        for future in as_completed(future_to_index):
            # Get index which tells the correct order to return as
            index = future_to_index[future]
            try:
                result = work(future.result())
                results[index] = result
            except Exception as e:
                # This shouldn't ever happen, API error checking done in the API handler
                print(f"Odd Exception: {e}")
                sys.exit("Stopping all execution")
    return results

In [10]:
# Function for calculating average win rate of a summoner using multithreading
def avg_wr_summoner_draft(match_history, summoner_puuid, rate_limiter):
    if(len(match_history) == 0):
        return DEFAULT_WR
    # Create the API URLS based on match_history
    urls = []
    for match in match_history:
        urls += [f"https://americas.api.riotgames.com/lol/match/v5/matches/{match}"]
    results = multithread_call(urls, lambda x: x, rate_limiter)
    total_matches = 0
    win_count = 0
    for response in results:
        try:
            # Finds the first participant's id who's equal to the summoner's puuid
            participant = next(p for p in response['info']['participants'] if p['puuid'] == summoner_puuid)
            if(participant['win'] == True):
                win_count+=1
            total_matches+=1
        except StopIteration:
            print("ERROR: StopIteration exception occured, Riot data incorrect?")
            print(response)
            print(f"Summoner PUUID: {summoner_puuid}")
            sys.exit("Stopping all execution")
        except:
            print("ERROR: Exception occured, shouldn't be here")
            sys.exit("Stopping all execution")
    return round(win_count/total_matches, 2)

In [11]:
# Function for calculating average win rate of a summoner 
# Note: ranked statistics are much faster and easier to obtain
def avg_wr_summoner_ranked(match_history_length, stats):
    if(match_history_length == 0):
        return DEFAULT_WR
        
    # Go through each gamemode and look for RANKED_SOLO_5x5
    for stat in stats:
        if(stat['queueType'] == "RANKED_SOLO_5x5"):
            wins = stat['wins']
            loses = stat['losses']
            total = wins+loses
            return round(wins/total, 2)

    # Found no ranked_solo games
    return DEFAULT_WR

In [12]:
# Gets Data for a given summoner PUUID, and their champ played (for champ mastery)
# Data currently equals: Summoner Level, match history length (max 20), AVG win rate for past 20 matches, champ mastery
def get_summoner_features(summoner_puuid, champion_id, rate_limiter):
    # Get summoners' match history & length using their PUUID, filters only for ranked games; max api_calls = 1 (No multithread)
    summoner_match_history = apiCallHandler(f'https://americas.api.riotgames.com/lol/match/v5/matches/by-puuid/{summoner_puuid}/ids?queue=420&type=ranked&start=0&count=20', rate_limiter)
    match_history_length = len(summoner_match_history)

    # All calls below can be multithreaded since prev info is not needed
    # urls = [] Future work: have all possible multi threaded calls do one call
    
    # Get summoners' level using their PUUID; max api_calls = 1
    summoner_data = apiCallHandler(f'https://na1.api.riotgames.com/lol/summoner/v4/summoners/by-puuid/{summoner_puuid}', rate_limiter)
    level = summoner_data['summonerLevel']
    summoner_id = summoner_data['id']
    
    # Get summoners' win rate using match history; max_api_calls = 20 
    # Future work: Ensure that no custom, bot, tutorial, or arena/limited game mode matches
    # Future work: If match count is < 20, THEN use quickplay and aram stats (assign weights to them)
    # Future work: add compatibility for predicting specifically gamemodes other than normal draft 
    #avg_wr = avg_wr_summoner_ranked(match_history_length, summoner_id, rate_limiter)

    # Parallize starting here
    urls = []
    # Get summoners' win rate using API
    urls += [f'https://na1.api.riotgames.com/lol/league/v4/entries/by-summoner/{summoner_id}']
    # Get summoners' champ mastery for the match 
    urls += [f'https://na1.api.riotgames.com/lol/champion-mastery/v4/champion-masteries/by-puuid/{summoner_puuid}/by-champion/{champion_id}']

    all_data_response = multithread_call(urls, lambda x: x, rate_limiter)

    # Win rate
    avg_wr = avg_wr_summoner_ranked(match_history_length, all_data_response[0])

    # Champ mastery
    champ_mastery_data = all_data_response[1]
    champ_mastery = champ_mastery_data['championPoints']
    #champ_mastery_data = apiCallHandler(f'https://na1.api.riotgames.com/lol/champion-mastery/v4/champion-masteries/by-puuid/{summoner_puuid}/by-champion/{champion_id}', rate_limiter)
    
    
    return (level, match_history_length, avg_wr, champ_mastery)

In [13]:
# Gets the features given the match ID
def get_features(match_id, rate_limiter):
    
    # Event for signaling all threads to stop
    #stop_event = threading.Event()
    
    match_info = apiCallHandler(f'https://americas.api.riotgames.com/lol/match/v5/matches/{match_id}', rate_limiter)
    
    players = match_info['metadata']['participants']
    
    # Team 1
    avg_lvl_1 = 0
    avg_mhl_1 = 0
    avg_wr_1 = 0
    sum_cm_1 = 0
    
    # Team 2
    avg_lvl_2 = 0
    avg_mhl_2 = 0
    avg_wr_2 = 0
    sum_cm_2 = 0
    
    # 1 means first 5 won, 2 means last 5 won, 3 means draw
    winner = 0
    if(match_info['info']['participants'][0]['win'] == True):
        winner = 1
    elif(match_info['info']['participants'][5]['win'] == True):
        winner = 2
    else:
        # Remake, no one won
        winner = 3
        print("REMADE")
    
    if winner != 3:
        for index in range(len(players)):
            # Get championID of each player
            champion_id = match_info['info']['participants'][index]['championId']
            level, match_history_length, wr, champ_mastery = get_summoner_features(players[index], champion_id, rate_limiter)
            if(index < 5):
                avg_lvl_1 += level
                avg_mhl_1 += match_history_length
                avg_wr_1 += wr
                sum_cm_1 += champ_mastery
            elif(index >= 5):
                avg_lvl_2 += level
                avg_mhl_2 += match_history_length
                avg_wr_2 += wr
                sum_cm_2 += champ_mastery
            else:
                print("Error: Shouldn't be here")
        avg_lvl_1 = round(avg_lvl_1 / 5, 2)
        avg_mhl_1 = round(avg_mhl_1 / 5, 2)
        avg_wr_1 = round(avg_wr_1 / 5, 2)
        
        avg_lvl_2 = round(avg_lvl_2 / 5, 2)
        avg_mhl_2 = round(avg_mhl_2 / 5, 2)
        avg_wr_2 = round(avg_wr_2 / 5, 2)
        
        #print(f"Avg lvl 1: {avg_lvl_1}")
        #print(f"Avg mhl 1: {avg_mhl_1}")
        #print(f"Avg wr 1: {avg_wr_1}")
        #print(f"Sum cm 1: {sum_cm_1}")
        
        #print(f"Avg lvl 2: {avg_lvl_2}")
        #print(f"Avg mhl 2: {avg_mhl_2}")
        #print(f"Avg wr 2: {avg_wr_2}")
        #print(f"Sum cm 2: {sum_cm_2}")
        
    return (match_id, avg_lvl_1, avg_mhl_1, avg_wr_1, sum_cm_1, avg_lvl_2, avg_mhl_2, avg_wr_2, sum_cm_2, winner)

In [14]:
# Converts a given match_id and its features into a dictionary
# Records will have format of dictionary(match_id, time, features...)
def features_to_dictionary(match_id, avg_lvl_1, avg_mhl_1, avg_wr_1, sum_cm_1, avg_lvl_2, avg_mhl_2, avg_wr_2, sum_cm_2, winner):
    features_record = {
        "match_id": match_id,
        "time": datetime.now(), 
        "avg_summoner_lvl_team_1": avg_lvl_1,
        "avg_match_history_length_team_1": avg_mhl_1,
        "avg_win_rate_team_1": avg_wr_1,
        "sum_champ_mastery_team_1": sum_cm_1,
        "avg_summoner_lvl_team_2": avg_lvl_2,
        "avg_match_history_length_team_2": avg_mhl_2,
        "avg_win_rate_team_2": avg_wr_2,
        "sum_champ_mastery_team_2": sum_cm_2,
        "winner": winner
    }
    return features_record

In [15]:
# Work function for grabbing (SAMPLE_SIZE_SCALE) amount for each players_info
def work_func_summonerID(players_info):
    players_summonerID = []
    for index in range(SAMPLE_SIZE_SCALE):
        player_summonerID = players_info[index]['summonerId']
        players_summonerID += [player_summonerID]
    return players_summonerID

In [16]:
# Work function for grabbing (NUM_SAMPLE_FOR_EACH_PLAYER) from each player
def work_func_sample_matches(match_history):
    matches = []
    index = 0
    while(index < len(match_history) and index < NUM_SAMPLE_FOR_EACH_PLAYER):
        matches += [match_history[index]]
        index += 1
    return matches

In [17]:
# Helper function for turning 2D lists to 1D
def twoD_to_oneD(list):
    res = []
    for i in range(len(list)):
        for j in list[i]:
            res += [j]
    return res

In [18]:
# Gets random players from a particular rank and division (one page each)
# 3 players each for now, and 3 matches for each player
def get_random_players(rate_limiter):
    # Get all players' summonerIDs
    urls = []
    for rank in SAMPLE_RANKS:
            for division in DIVISIONS:
                urls += [(f'https://na1.api.riotgames.com/lol/league/v4/entries/RANKED_SOLO_5x5/{rank}/{division}?page=1')]
    all_players_summonerID_twoD = multithread_call(urls, work_func_summonerID, rate_limiter)
    all_players_summonerID = twoD_to_oneD(all_players_summonerID_twoD)

    # Convert summonerIds into PUUIDs
    urls = []
    for player_summonerID in all_players_summonerID:
        urls += [f'https://na1.api.riotgames.com/lol/summoner/v4/summoners/{player_summonerID}']
    all_players_PUUID = multithread_call(urls, lambda info: info['puuid'], rate_limiter)
    return all_players_PUUID

In [19]:
# Gets random match ids from SAMPLE_RANKS
def get_random_match_ids(rate_limiter):
    # First get players from a particular rank and division (one page each)
    players_PUUID = get_random_players(rate_limiter)

    # Get [NUM_SAMPLE_FOR_EACH_PLAYER] matches from each player
    urls = []
    for player_PUUID in players_PUUID:
        urls += [f'https://americas.api.riotgames.com/lol/match/v5/matches/by-puuid/{player_PUUID}/ids?queue=420&type=ranked&start=0&count=20']
    matches_twoD = multithread_call(urls, work_func_sample_matches, rate_limiter)
    matches = twoD_to_oneD(matches_twoD)
    return matches

In [20]:
# Testing get_features
# Rate_limiters for limiting requests
# sleep for 2min here in future just in case, or pass rate_limiter into this function too (probs this)
'''
rate_limiter = RateLimiter(max_calls_1=MAX_REQUESTS_PER_SECOND, period_1=1, max_calls_2=MAX_REQUESTS_PER_2MINUTE, period_2=120)
match_id, avg_lvl_1, avg_mhl_1, avg_wr_1, sum_cm_1, avg_lvl_2, avg_mhl_2, avg_wr_2, sum_cm_2, winner = get_features('NA1_5024880870', rate_limiter)
record = features_to_dictionary(match_id, avg_lvl_1, avg_mhl_1, avg_wr_1, sum_cm_1, avg_lvl_2, avg_mhl_2, avg_wr_2, sum_cm_2, winner)
print(record)
'''

"\nrate_limiter = RateLimiter(max_calls_1=MAX_REQUESTS_PER_SECOND, period_1=1, max_calls_2=MAX_REQUESTS_PER_2MINUTE, period_2=120)\nmatch_id, avg_lvl_1, avg_mhl_1, avg_wr_1, sum_cm_1, avg_lvl_2, avg_mhl_2, avg_wr_2, sum_cm_2, winner = get_features('NA1_5024880870', rate_limiter)\nrecord = features_to_dictionary(match_id, avg_lvl_1, avg_mhl_1, avg_wr_1, sum_cm_1, avg_lvl_2, avg_mhl_2, avg_wr_2, sum_cm_2, winner)\nprint(record)\n"

# Placing Data Into CSV File

In [21]:
# Checks the CSV file to see if a given match_id is already there
def is_duplicate_match_id(csv_already_exists, match_id, data):
    
    # Loading current CSV file if CSV already exists
    if(csv_already_exists):
        df = pd.read_csv("game_data.csv")
        if(match_id in df['match_id'].values):
            return True

    # Check current data
    for record in data:
        if(match_id in record.get('match_id')):
            print("dupe: matchid already in data list")
            return True
    return False

In [22]:
# Check if record is already in the CSV file first
# Takes in a list of match_ids, extracts their features, and places them into a data list
def get_data_from(csv_already_exists, match_ids, rate_limiter):
    data = []
    for match_id in match_ids:
        # Search current CSV file if match_id already exists
        if not(is_duplicate_match_id(csv_already_exists, match_id, data)):
            currData = []
            match_id, avg_lvl_1, avg_mhl_1, avg_wr_1, sum_cm_1, avg_lvl_2, avg_mhl_2, avg_wr_2, sum_cm_2, winner = get_features(match_id, rate_limiter)
            # Check if not remake (3)
            if(winner != 3):
                record = features_to_dictionary(match_id, avg_lvl_1, avg_mhl_1, avg_wr_1, sum_cm_1, avg_lvl_2, avg_mhl_2, avg_wr_2, sum_cm_2, winner)
                currData.append(record)
                data.append(record)
                # Write to CSV/create a new one to save progress
                df = pd.DataFrame(currData)
                if csv_already_exists:
                    # Use this one if DataFrame/CSV already exists
                    df.to_csv("game_data.csv", index=False, mode='a', header=False)
                else:
                    # Use this one to create CSV for the first time
                    df.to_csv("game_data.csv", index=False)
                    csv_already_exists = True
            
    return data

In [24]:
# Rate_limiters for limiting requests
rate_limiter = RateLimiter(max_calls_1=MAX_REQUESTS_PER_SECOND, period_1=1, max_calls_2=MAX_REQUESTS_PER_2MINUTE, period_2=120)

# Call function to get some random match_ids
match_ids = get_random_match_ids(rate_limiter)

#match_ids = ['NA1_5024880870', 'NA1_5088501690', 'NA1_5024880870']

# Remove old CSV file if set to true
if(OVERRIDE_AND_CREATE_NEW_DATA and os.path.isfile("game_data.csv")):
    os.remove("game_data.csv")
    
# Get the data (features, time, and label) from those matches
data = []
if os.path.isfile("game_data.csv"):
    data = get_data_from(True, match_ids, rate_limiter)
else:
    data = get_data_from(False, match_ids, rate_limiter)

print(data)

Rate limit reached for 100/2m. Sleeping for 82.69 seconds.
Rate limit reached for 100/2m. Sleeping for 29.66 seconds.
Rate limit reached for 100/2m. Sleeping for 30.66 seconds.
Rate limit reached for 100/2m. Sleeping for 30.65 seconds.
Rate limit reached for 100/2m. Sleeping for 38.24 seconds.
Rate limit reached for 100/2m. Sleeping for 36.63 seconds.
Rate limit reached for 100/2m. Sleeping for 36.63 seconds.
Rate limit reached for 100/2m. Sleeping for 37.22 seconds.
Rate limit reached for 100/2m. Sleeping for 38.79 seconds.
Rate limit reached for 100/2m. Sleeping for 38.79 seconds.
Rate limit reached for 100/2m. Sleeping for 38.01 seconds.
Rate limit reached for 100/2m. Sleeping for 39.66 seconds.
Rate limit reached for 100/2m. Sleeping for 37.86 seconds.
Rate limit reached for 100/2m. Sleeping for 45.39 seconds.
Rate limit reached for 100/2m. Sleeping for 45.39 seconds.
Rate limit reached for 100/2m. Sleeping for 45.93 seconds.
Rate limit reached for 100/2m. Sleeping for 45.24 second

# Loading Data