# TILT - Teemo Induced Loss of Tranquility
A research project to study main factors in inducing tilt.

ONLY LOOKS AT THE PERSON STATS THEMSELVES


# Imports

In [1]:
# from collections import defaultdict
from datetime import datetime
from pathlib2 import Path
from riotwatcher import LolWatcher, ApiError
from tenacity import retry, wait_exponential, stop_after_attempt

# import arrow
# import csv
import itertools
# import datetime
# import time
import json
import numpy as np
import os
import pandas as pd

# Functions

In [2]:
def touch(path):
    """
    Creates a file if it doesnt exists and also create any directories in the path that do not exist.

    Args:
        path (str): File path.
    Returns:
        None
    """
    basedir = os.path.dirname(path)
    if not os.path.exists(basedir):
        os.makedirs(basedir)
    with open(path, mode="a"):
        pass
        

In [3]:
def api_key(api_key_loc):
    """
    Read in the development API key from a file and checks if it is viable.
    
    Args:
        credentials (str): Name of json file containing the credentials.
    Returns:
        api_key (str): The API key.
    """

    # Keep requestion for a correct key or until canceled
    while True:
        with open(api_key_loc, "r") as credentials:
            creds = json.load(credentials)
            api_key = creds["dev_api_key"]
            lol_watcher = LolWatcher(creds["dev_api_key"])
            try:
                # Validate API key by using it to check server status
                lol_watcher.data_dragon.versions_all()
                # Break if key is functional
                break
            except ApiError as error:
                # If the current API key does not work input new one
                if error.response.status_code == 403:
                    new_api_key = input("API key is incorrect, enter correct key here.")
                    creds["dev_api_key"] = new_api_key
                    # Replace the old API key
                    with open(api_key_loc, "w") as json_data:
                        json.dump(creds, json_data)
    return api_key

In [4]:
def adj_patch_time(reg, patch_nr="now"):
    # TODO: add time epochs instead of patch nrs
    """
    Read the patch number and return the time adjusted for time shift caused by different timezones.  
    Args:
        reg (str): Official registered Riot server that hosts league of legends.
        patch_nr (str): League of legends update patch number. 
            Defaults to "now" which is the current time.
    Returns:
        adj_time (long): Time in (milli)seconds following the Coordinated Universal Time (UTC) format.
    """

    # Read patch data
    patch_loc = proj_dir / "data" / "patches.json"
    with open(patch_loc, "r") as in_file:
        patch_data = json.load(in_file)
    
    # Set the base utc time
    if patch_nr == "now":
        patch_time = int(datetime.utcnow().timestamp())
    else:
        for patch in patch_data["patches"]:
            if patch["name"] == patch_nr:
                patch_time = patch["start"]
    # Check if patch number is valid
    assert patch_time, f"Patch number: {patch_nr} is unknown"

    # Set the time shift
    try:
        reg_shifts = patch_data["shifts"][reg.upper()]
    except KeyError:
        # Check if region provided is valid
        raise KeyError(f"Region: {reg} is unknown")
    
    # Calculate time adjusted for the time shift of the region
    adj_time = patch_time + reg_shifts
    return adj_time

In [5]:
def trans_reg(reg_abbrv):
    """
    Translate a league of legends region into a region readable by riot watcher.
    
    Args:
        reg_abbrv (str): Abbreviation of a official registered Riot servers that
            hosts league of legends (e.g. euw1).
    Returns:
        rw_region (str): Riot Watcher region the reg_abbrv server falls under.
    """
    # Look up in the list what the riot watcher region is for the given region abbreviation
    regions_metadata = {"br1": "americas",
                        "eun1": "europe",
                        "euw1": "europe",
                        "jp1": "asia",
                        "kr": "asia",
                        "la1": "americas",
                        "la2": "americas",
                        "na1": "americas",
                        "oc1": "americas",
                        "tr1": "europe",
                        "ru": "europe"
                        }

    for reg, rw_reg in regions_metadata.items():
        if reg_abbrv.lower() == reg:
            rw_region = rw_reg
    
    return rw_region

In [6]:
@retry(wait=wait_exponential(multiplier=1, min=5, max=60), stop=stop_after_attempt(10))
def get_matches(reg, pid, s_time, e_time):
    """
    Retrieve match IDs from a summoner within a given timeframe.

    Args:
        reg (str): Abbreviation of a official registered Riot servers that
            hosts league of legends (e.g. euw1).
        pid (str): An Encrypted globally unique identifyer for a summoner.
        s_time (long): Start of a timeframe in (milli)seconds following
            the Coordinated Universal Time (UTC) format.
        e_time (long): end of a timeframe in (milli)seconds following
            the Coordinated Universal Time (UTC) format.
    Returns:
        matches (list): League of legends match IDs in chronological order
            starting with the most recent match.
    """

    # Retrieve all match ID's between two time points
    rw_reg = trans_reg(reg)
    matches = lol_watcher.match.matchlist_by_puuid(region=rw_reg,
                                                   puuid=pid,
                                                   start_time=s_time,
                                                   count=100,
                                                   queue=420,
                                                   end_time=e_time)
    
    # Geather more match IDs in case matches exceeds the standard limit of 100
    while len(matches) % 100 == 0 and len(matches) != 0:
        # Geather match details of earliest match
        match_deets = lol_watcher.match.by_id(rw_reg, matches[-1])
        # Start time of earliest match
        early_g_start = int(str(match_deets["info"]["gameCreation"])[:10])
        # Select 100 matches previous to early_g_start  
        match_add = lol_watcher.match.matchlist_by_puuid(region=rw_reg,
                                                         puuid=pid,
                                                         start_time=s_time,
                                                         count=100,
                                                         queue=420,
                                                         end_time=early_g_start)
        if len(match_add) == 0:
            break
        else:
            matches.extend(match_add)

    return matches

In [7]:
@retry(wait=wait_exponential(multiplier=1, min=5, max=60), stop=stop_after_attempt(10))
def geather_data(puuid, matches):
    """
    Geather summoner data from a match and chronology data of the match.
    
    Args:
        puuid (str): An Encrypted globally unique identifyer for a summoner.
        matches (list): League of legends match IDs in chronological order
                        starting with the most recent match.  
    Returns:
        df_matches_data (df): Data geathered from matches of a summoner.
    """
    
    game_info = ["gameCreation", "gameStartTimestamp"]
    keep_data = ["puuid", "teamPosition", "kills", "assists", "deaths", 
                 "doubleKills", "tripleKills", "quadraKills", "pentaKills", 
                 "killingSprees", "largestKillingSpree", 
                 "gameEndedInEarlySurrender", "teamEarlySurrendered", "gameEndedInSurrender",
                 "neutralMinionsKilled", "totalMinionsKilled", "teamId", "win", "timePlayed"]

    col_names = ["match_id", "puuid", "pos", "kills", "assists", "deaths", 
                 "2_kills", "3_kills", "4_kills", "5_kills", 
                 "kill_spree", "max_kill_spree", 
                 "remake", "pre_15_surr", "game_surr",
                 "neutral_kills", "minion_kills", "team_id", "win",
                 "time_played", "game_make", "game_start"]
      
    comp_data = []
    # Geather match details
    for match_id in matches:
        reg = match_id.split("_")[0].lower()
        match_deets = lol_watcher.match.by_id(trans_reg(reg), match_id)

        # Collect time data
        time_data = dict((key, match_deets["info"][key]) for key in game_info)

        # Collect summoner data       
        for part_info in match_deets["info"]["participants"]:
            if part_info["puuid"] == puuid:
                filt_data = dict((key, part_info[key]) for key in keep_data)
        match_data = {"match_id": match_id} | filt_data | time_data
        comp_data.append(match_data.values())
    
    # Store all data in a dataframe
    df_matches_data = pd.DataFrame(comp_data, columns=col_names)

    return df_matches_data
    

In [18]:
def filt_matches(matches_data, max_rest, min_streak):
    """
    Filter matches based on rest time in between matches and number of matches played in a row.  
    
    Args:
        matches_data (df): Data geathered from matches of a summoner.
        max_rest (int): Rest time in between matches in miliseconds e.g. 3600000 = 1 hour.
        min_streak (int): Minimum games played in a row with less then max_rest between.
    Returns:
        df_match_filt (df): Matches filtered based on minimum subsequent matches within
            the maximum rest time in between the matches.
    """
      
    # Add rest time between previous and next match to base the filtering of on
    df_match_filt = matches_data 
    df_match_filt["game_end"] = df_match_filt.loc[:,["game_start", "time_played"]].sum(axis=1)
    df_match_filt["prev_game_end"] = df_match_filt["game_end"].shift(-1, fill_value=np.nan)
    df_match_filt["next_game_make"] = df_match_filt["game_make"].shift(1, fill_value=np.nan)
    df_match_filt["time_since_last"] = df_match_filt["game_make"] - df_match_filt["prev_game_end"]
    df_match_filt["time_till_next"] = df_match_filt["next_game_make"] - df_match_filt["game_end"]
    
    # Filter for matches based on rest time below the max_rest time

    # Find games that were played in a streak by grouping based on matches played subsequently
    # Copy index to know where a empty line was inserted
    df_match_filt["idx"] = df_match_filt.index
    # Add empty lines between groups to split them up
    indices = np.where((df_match_filt["time_till_next"] >= max_rest) | (np.isnan(df_match_filt["time_since_last"].shift(-1))))[0]
    rows_ = dict.fromkeys(df_match_filt.columns.tolist(), np.nan)
    # Add empty lines
    df_match_filt = pd.DataFrame(np.insert(df_match_filt.values, [x for x in indices],
                                           values=list(rows_.values()),
                                           axis=0),columns=rows_.keys())

    # Temporarily store streak_id to know how big the groups are
    game_streak = df_match_filt["idx"].diff().ne(1).cumsum()
    df_match_filt["streak_id"] = df_match_filt.groupby(game_streak).ngroup()
    # Filter out streaks that are lower then min_streak
    df_match_filt = df_match_filt[df_match_filt["idx"].groupby(game_streak).transform("count") >= min_streak]
    
    # Clean up
    # Drop all empty rows
    df_match_filt = df_match_filt[df_match_filt["idx"].notna()]

    return df_match_filt


In [31]:
def get_summoner_data(regs, tiers, divs, sum_lim, p_patch, r_patch, max_rest, min_streak, sumo_data_loc):
    """
    Get puuid's, tier and division, from all regions that are in ranks below master rank ((random?) above bronze?).
    
    Args:
        regs (list): Official registered Riot servers that hosts league of legends.
        tier (list): Tiers below Master rank.
        divs (list): Divisions in roman numerals.
        sum_lim (int): Maximum number of summoner ids to collect per region, tier and division.
        p_patch (str): A patch prior to then r_patch's patch.
        r_patch (str): A more recent patch then p_patch's patch.
        max_rest (int): Rest time in between matches in miliseconds e.g. 3600000 = 1 hour.
        min_streak (int): Minimum games played in a row with less then max_rest between.
        sumo_data_loc (str): location to store the info on the summoner and game statistics.
    Returns:
        None
    """
    
    for reg, tier, div in itertools.product(regs, tiers, divs):
        summs_div = 0
        page_nr = 0
        # Keep adding new summoners until the summoner limit has been reached
        while summs_div < sum_lim:
            page_nr += 1
            summs = lol_watcher.league.entries(reg, "RANKED_SOLO_5x5", tier, div, page_nr)
            # Look into data per summoner
            for sumo in summs:
                if summs_div < sum_lim:
                    # Get PUUID
                    pid = lol_watcher.summoner.by_id(reg, sumo["summonerId"])["puuid"]

                    # Retrieve all match ID's between two time points
                    match_ids = get_matches(reg, pid, adj_patch_time(reg, p_patch), adj_patch_time(reg, r_patch))
                    # Skip summoners that have no matches in the given time frame 
                    if not match_ids:
                        continue
        
                    # Geather match data of the summoner
                    match_info = geather_data(pid, match_ids)

                    # Filter matches
                    filt_match_info = filt_matches(match_info, max_rest, min_streak)
                    # Skip summoners that have no matches left after filtering
                    if filt_match_info.empty:
                        continue
                    
                    # Add summoner rank information 
                    cur_rank = f"{tier}_{div}"
                    filt_match_info.insert (2, "rank", cur_rank)

                    # Increase the summoner counter
                    summs_div += 1
                    
                    # Store summoner data in a tsv file
                    file_true = sumo_data_loc.exists()
                    filt_match_info.to_csv(sumo_data_loc,
                                           header=not file_true,
                                           mode="a" if file_true else "w",
                                           sep="\t",
                                           index=False)

In [34]:
def prep_data(df_match_filt, out_file):
    """
    Preprocess the df_match_filt, manipulation and dropping some data to make to ready for use.  
    
    Args:
        df_match_filt (df): Matches filtered based on minimum subsequent matches within
            the maximum rest time in between the matches.
    Returns:
        None
    """

    # Beautification of dataframe
    # Move information of next game to next line by reversing the dataframe. Games played first (oldest) are on top 
    df_match_prep = df_match_filt[::-1]
    
    # Redo the streak_id numbers
    streak_id_ident = df_match_prep["streak_id"].diff().ne(0).cumsum()
    idx_consec = df_match_filt["idx"].diff().ne(1).cumsum()
    df_match_prep["streak_id"] = df_match_prep.groupby([streak_id_ident, idx_consec]).ngroup()
    
    # Add informational columns
    # Calculate the number of games won and lost in succesion per groep of games played within max_rest time of one another 
    game_streak = df_match_prep["streak_id"].diff().ne(0).cumsum()
    df_match_prep["consec_wl"] = df_match_prep.win.groupby((df_match_prep.win != df_match_prep.groupby(game_streak)["win"].shift()).cumsum()).cumcount()
    # Turn streak values negative if the game ended in a loss
    df_match_prep.loc[df_match_prep.loc[:, "win"] == False, "consec_wl":] *= -1
    # Add if the next game is a win or lose
    df_match_prep['win_next'] = df_match_prep.groupby(game_streak)["win"].shift(-1, fill_value=np.nan)

    # Clean up the dataframe
    # Dropping columns used for calculations that are not needed anymore
    col_drop = ["game_make", "game_start", "game_end", "prev_game_end", "next_game_make", "time_since_last", "time_till_next", "idx"]
    df_match_prep.drop(columns=col_drop, axis=1, inplace = True)

    # Changing all values that are above max rest to none
    # df_match_prep['A'].where(df_match_prep['A'] <= 9, 11, inplace=True) 

    df_match_prep.to_csv(out_file,
                         mode="w",
                         sep="\t",
                         index=False)
    return df_match_prep

    
# TODO: if this gets to slow it might be smarter to consider storing the data as a pickle object instead of a csv file
# Read in the summoner data
sumo_info_raw_loc = data_dir / "summoner_data_raw.tsv"
sumo_info_raw = pd.read_csv(sumo_info_raw_loc, sep = "\t")

# Preprocessing
sumo_data_loc = out_dir / "prep_sumo_data.tsv"
sumo_info = prep_data(sumo_info_raw, sumo_data_loc)

# sumo_info

sumo_info.iloc[:, [0]+list(range(-9,0))]

Unnamed: 0.1,Unnamed: 0,game_surr,neutral_kills,minion_kills,team_id,win,time_played,streak_id,consec_wl,win_next
26,20,True,52,249,100,False,1637,0,0,False
25,19,False,160,154,200,False,2847,0,-1,False
24,18,False,0,0,100,False,197,0,-2,False
23,17,False,0,0,200,False,191,0,-3,True
22,16,False,83,189,200,True,1866,0,0,
21,11,True,101,44,200,False,1717,1,0,True
20,10,False,7,4,100,True,197,1,0,False
19,9,True,76,73,200,False,1219,1,0,False
18,8,True,113,45,100,False,2153,1,-1,
17,2,False,0,13,200,False,196,2,0,True


# Global variables and settings

In [12]:
# Directory locations

# Project folders
proj_dir = Path.cwd().parent

# Raw data storage
data_dir = proj_dir / "data"

# Out dir
out_dir = proj_dir / "out"

# Gobal variables

# Set API key
api_key_loc = data_dir / "dev_api_key.json"

# Enter API key
lol_watcher = LolWatcher(api_key(api_key_loc))

# Read user settings
settings_loc = proj_dir / "settings" / "config.json"
with open(settings_loc, "r") as settings_data:
    settings = json.load(settings_data)

### Data collection

In [37]:
# Retrieve the summoner info of n summoners

# Remove output file if it exists
raw_sumo_data = data_dir / "summoner_data_raw.tsv"

while Path(raw_sumo_data).exists():
    answer = input("""Enter "yes" to overwrite file, or enter new filepath""")
    if answer.upper() in ["Y", "YES"]:
        os.remove(raw_sumo_data)
    else:
        raw_sumo_data = answer
        touch(raw_sumo_data)
        
get_summoner_data(regs=settings["regions"],
                  tiers=settings["tiers"],
                  divs=settings['divisions'],
                  sum_lim=settings['summoner_limit'],
                  p_patch=settings["prior_patch"],
                  r_patch=settings["recent_patch"],
                  max_rest=settings["max_rest"],
                  min_streak=settings["min_streak"],
                  sumo_data_loc=raw_sumo_data)


### Preprocessing

In [38]:
# TODO: if this gets to slow it might be smarter to consider storing the data as a pickle object instead of a csv file
# TODO: or just add this to the data collection step but that feels a bit wrong
# Read in the summoner data
sumo_info_raw_loc = data_dir / "summoner_data_raw.tsv"
sumo_info_raw = pd.read_csv(sumo_info_raw_loc, sep = "\t")

# Preprocessing
sumo_data_loc = out_dir / "prep_sumo_data.tsv"
sumo_info = prep_data(sumo_info_raw, sumo_data_loc)

sumo_info.head()

Unnamed: 0,match_id,puuid,rank,pos,kills,assists,deaths,2_kills,3_kills,4_kills,...,pre_15_surr,game_surr,neutral_kills,minion_kills,team_id,win,time_played,streak_id,consec_wl,win_next
26,EUW1_5452249285,DPBzxfs5KusnEjaaVbsxgLvSxwoCgPlI7vNQximyI_x-e6...,SILVER_II,BOTTOM,16,6,5,1,0,0,...,False,True,52,249,100,False,1637,0,0,False
25,EUW1_5452286100,DPBzxfs5KusnEjaaVbsxgLvSxwoCgPlI7vNQximyI_x-e6...,SILVER_II,MIDDLE,15,1,12,3,0,0,...,False,False,160,154,200,False,2847,0,-1,False
24,EUW1_5452450937,DPBzxfs5KusnEjaaVbsxgLvSxwoCgPlI7vNQximyI_x-e6...,SILVER_II,,0,0,0,0,0,0,...,True,False,0,0,100,False,197,0,-2,False
23,EUW1_5452384689,DPBzxfs5KusnEjaaVbsxgLvSxwoCgPlI7vNQximyI_x-e6...,SILVER_II,,0,0,0,0,0,0,...,True,False,0,0,200,False,191,0,-3,True
22,EUW1_5452368148,DPBzxfs5KusnEjaaVbsxgLvSxwoCgPlI7vNQximyI_x-e6...,SILVER_II,BOTTOM,28,7,9,7,2,0,...,False,False,83,189,200,True,1866,0,0,


### Data Analysis

In [36]:

# Base lines

# Win rate per rank
print(sumo_info.groupby("rank")["win"].mean())

# Win rate 
sumo_info.groupby("consec_wl")["win_next"].mean()



# sumo_info[["win", "consecutive WL"]]
# base line = all games of a player
# tilt is calculated as win % er streak increase or decrease
# % win on first game
# % win on secoond game
# % win on third game etc

# make 2 datasets games long rest, games short rest 
# long rest >
# short rest <=
# 
#





rank
SILVER_I     0.466667
SILVER_II    0.333333
Name: win, dtype: float64


consec_wl
-3    1.000000
-2    0.000000
-1    0.666667
 0    0.428571
 1    0.000000
Name: win_next, dtype: float64

#### Importance of rest

In [None]:
# REst time vs win rate plot
# 
# check if player has games with short rest take all games fitler is done later in data analysis
# 
#  

#### Player profiles

In [None]:
# WHen to players play? 
# player to time point


#### Tilt or in the zone?

In [None]:
# winrate increase or decrease by factor of previous game
# base winrate?
# define good and bad stats using medians/average normal dist? 
# Win rate in next game after losing with bad stats
# Win rate in next game after losing with good stats
# Win rate in next game after winning with bad stats
# Win rate in next game after winning with good stats
