In [45]:
import pandas as pd

game_metadata = pd.read_csv('./out/game_metadata.csv')
game_player_stats = pd.read_csv('./out/game_player_stats.csv')

First of we want a elo rating of each team before each major tournament for that we first sort all matches chronologically

In [46]:
#Convert date to datetime
game_metadata['date'] = pd.to_datetime(game_metadata['date'])
print(game_metadata.dtypes)

gameid                object
date          datetime64[ns]
league                object
playoffs               int64
patch                float64
gamelength             int64
dtype: object


In [47]:
#Actually sort the matches by date
game_metadata = game_metadata.sort_values(by='date').reset_index(drop=True)

Note: Import the team_name_map and player_name_map for now we can just use the team through id but would be useful when modelling/graphing it to match it to the teams

Now we need to calculate the elo of each team and save a snapshot of said elo before each tournament and each patch, the idea is as follows: 
1. Initialize the elo for each team with 1500
2. Create a list of major tournament start date (We will be looking at Worlds, MSI and First Stand for now)
3. Iterate over the matches: 
    4. Check whether the match is at a major tournament
    5. If yes
        6. Take a 'snapshot' of the whole elo ratings before said match
        7. Save it in a map where the key is the date of the first match of that tournament e.g. the date of the current match
    8. Update the elo ratings of the two teams using the result of the match
9. END

After we have a map of Elo rating lists for each team before each tournament 

In [48]:
#First we create a dataframe containing the elo of each 
all_teams = game_player_stats['teamid'].unique()
print(len(all_teams))

#Creates a map of elo ratings where we start at 1500 for each team
elo_ratings = {teamid: 1500 for teamid in all_teams}

1849


In [49]:
#Now we need to implement a function that calculates the Elo for each team after a match
#For now we use the elo algorithm see original paper also cite it
#Other canditates would be glicko2 or trueskill

#This function returns the updated ratings for each player in our case teams
#The result_A is modelled out of perspective of A thus 0 means A lost and B won and 1 means A won and B lost
#Using original formulas for expected outcome and update formula
def update_elo(rating_A, rating_B, result_A, k=32):
    #Calculate win probability for each team, e.g. the expected outcome
    expected_outcome_A = 1 / ( 1+10 **((rating_B-rating_A) / 400) )
    expected_outcome_B = 1- expected_outcome_A

    #Update the elo ratings
    new_rating_A = rating_A + k*(result_A - expected_outcome_A)
    new_rating_B = rating_B + k*((1 - result_A) - expected_outcome_B)

    return new_rating_A, new_rating_B

In [50]:
#Essentially we dont need to create a list for each major tournament since we can for each game look at
#which league that game was played and if it is either 'MSI', 'WLDs' and 'FST' we know we arrived at a major tournament
#For now a lot of unneccessary teams since we only look at tier 1 events but have tier 2 leagues included but doesnt matter

tournament_list = ['MSI', 'WLDs', 'FST']

gameid                object
date          datetime64[ns]
league                object
playoffs               int64
patch                float64
gamelength             int64
dtype: object


In [53]:
#Create a map that saves the elo snapshot
elo_snapshot = {}   #Key is the date of the first match of that tournament e.g the first row that has said tournament

#Now we iterate over the matches
for idx, row in game_metadata.iterrows():
    current_league = row['league']

    #Copy the a snapshot elo rating if current match is of a major tournament
    if current_league in tournament_list:
        elo_snapshot[row['date']] = elo_ratings.copy()
    
    #Now update the elo of each team
    #First get both teams
    game_entries = game_player_stats[game_player_stats['gameid'] == row['gameid']]
    assert len(game_entries) == 10
    teams_in_game = game_entries['teamid'].unique()
    team_A, team_B = teams_in_game

    #Now fetch the result 
    results_A = game_entries[game_entries['teamid'] == team_A]
    result_A = results_A['result'].unique()[0]

    #Finally actually update the ratings
    new_rating_A, new_rating_B = update_elo(elo_ratings[team_A], elo_ratings[team_B], result_A)
    elo_ratings[team_A] = new_rating_A
    elo_ratings[team_B] = new_rating_B


In [55]:
#Save the data 
import pickle 

with open('./out/elo_ratings.pkl', 'wb') as f:
    pickle.dump(elo_ratings, f)

with open('./out/elo_snapshot.pkl', 'wb') as f:
    pickle.dump(elo_snapshot, f)