# Elo Ratings
#### The Elo rating system is a method for calculating the relative skill levels of Teams in zero-sum games such as Basketball.

In [2]:
import pandas as pd 
from plotnine import *
pd.set_option('display.max_columns', None)
#load game data cleaned
games = pd.read_csv(r'~/jupyter/capstone_Group10/data/gamedatacleaned-test.csv')
#rename unamed columns
games.rename(columns = {'Unnamed: 0':'GameDateKey'}, inplace = True)



Calculating elo metric using the Teams in Game Data Cleaned dataset. First we need to find columns for Winning team, winning score, losing team, and losing score added to the Dataframe.

The elo metric awards the winner by adding points and the loser loses points so it remains a zero-sum game.

In [None]:
#create empyty lists to find Wteam, LTeam, Winner Score, and Loser Score
hometeam = []
awayteam =[]
Lteamscore = []
Wteamscore = []
wloc = []

for index, row in games.iterrows():
    if row['winner'] == 'Home':
        hometeam.append(row['hometeam']) 
        Wteamscore.append(row['home_points'])
        awayteam.append(row['awayteam'])
        Lteamscore.append(row['away_points'])
        wloc.append(row['winner'])
    else: #Away wins 
        hometeam.append(row['awayteam'])
        awayteam.append(row['hometeam'])
        Wteamscore.append(row['away_points'])
        Lteamscore.append(row['home_points'])
        wloc.append(row['winner'])
#assigning new columns to Dataframe  
games['Wteam'] = hometeam
games['Lteam'] = awayteam
games['WScore'] = Wteamscore
games['LScore'] = Lteamscore
games['wloc'] = wloc

Going to select specific columns from the Dataframe and create new column for margin of victory. The margin of victory is included when calculating Elo metric. 

In [None]:
#pulling necessary columns into new dataframe
a = games[["WScore","LScore","Wteam","Lteam", "wloc", "date"]]
#calculating margin of victory for games
a['margin'] = a['WScore']  -  a['LScore']
#converting date to year for season
a["Season"] = pd.to_datetime(a["date"]).dt.year
#Elo is updated based on margin of victory
a['margin']=a.WScore-a.LScore

Creating Dictionary of Team Names from game data cleaned. The dictionary will store Team Name and Elo Rating which can be updated as we parse game data cleaned and update the values accordingly. 
Each will start at the value 1500 which represents and average team. Since we are using end of year stats for the model we figured even though every team starts at the same value by the end of the season they would diverge towards 
expected values. Then for the years after the metric would be even more representative of team performance.

In [None]:
#create list of teams in the dataset
team_ids = set(a.Wteam).union(set(a.Lteam))

#create dictionary of team names and elo values
#every team started at the average ranking of 1500 
elo_dict=dict(zip(list(team_ids),[1500]*len(team_ids)))

We created three functions to calculate the values. The first two functions are utillized in the last function to calculate elo prediction and expection margin based on elo. Which will be used on game data cleaned to calculate metric.

In [None]:
#calculates elo- inputs include Team 1 Elo and Team 2 Elo
#Elo ratings 1/10*(ELO1-ELO2/400)+1
def elo_pred(elo1,elo2):
    return(1./(10.**(-(elo1-elo2)/400.)+1.))

#expected margin of victory based on elo ratings
#the difference in rating is included in elo metric
def expected_margin(elo_diff):
    return((7.5+0.006*elo_diff))

#calculate elo score based on inputs
def elo_update(w_elo,l_elo,margin):
    elo_diff = w_elo-l_elo #difference between elo scores
    #elo prediction function using input Winning Elo and Losing Elo
    pred=elo_pred(w_elo,l_elo)
    #comparing margin to expected margin using above function
    mult=((margin+3.) **0.8)/expected_margin(elo_diff)
    #update is proportional to how much over or under performs the expected outcome
    update=K*mult*(1-pred)
    return(pred,update)

Then the functions are applied to game data cleaned 

In [None]:
#create empty lists
preds=[]
w_elo=[]
l_elo=[]
#affects how quickly elo reacts to new information
K = 15.
#Home advantage is included 
HOME_ADVANTAGE=100.
for index,row in a.iterrows():
    w=row.Wteam
    l=row.Lteam
    margin=row.margin
    wloc=row.wloc

    # Home advantage is included in metric
    w_ad, l_ad, = 0., 0.
    if wloc == "Home":
        w_ad += HOME_ADVANTAGE
    elif wloc == "Away":
        l_ad += HOME_ADVANTAGE
    
    # Get elo updates as a result of the game
    pred, update = elo_update(elo_dict[w] + w_ad, elo_dict[l] + l_ad,margin)
    #update, penalizing loser as much as winner
    #pure elo ratings systems have equal transactions of points
    elo_dict[w] += update
    elo_dict[l] -= update
    
    # Save prediction and new Elos for each round
    preds.append(pred)
    w_elo.append(elo_dict[w])
    l_elo.append(elo_dict[l])

a['w_elo'] = w_elo
a['l_elo'] = l_elo

Since the area of focus is on the season values we calculated the End of season values for each team per season. 

In [None]:
#calculate season elos - for validation check of scores
def final_elo_per_season(df, team_id):
    d = df.copy()
    #filtering for Team IDs, could be in LTeam or WTeam column
    d = d.loc[(d.Wteam == team_id) | (d.Lteam == team_id), :]
    #sort values based on season and date
    d.sort_values(['Season','date'], inplace=True)
    #drop duplicates leaving the last Elo value for each team each season
    d.drop_duplicates(['Season'], keep='last', inplace=True)
    w_mask = d.Wteam == team_id
    l_mask = d.Lteam == team_id
    d['season_elo'] = None
    d.loc[w_mask, 'season_elo'] = d.loc[w_mask, 'w_elo']
    d.loc[l_mask, 'season_elo'] = d.loc[l_mask, 'l_elo']
    out = pd.DataFrame({
        'team_name': team_id,
        'season': d.Season,
        'season_elo': d.season_elo
    })
    return(out)
df_list = [final_elo_per_season(a, id) for id in team_ids]
season_elos = pd.concat(df_list)

Here we get the data ready for the model. We drop unnecessary columns and use index match for merging the season Elo ratings with game data cleaned then export the csv for the next step in the pipeline.

In [None]:
games.drop(['WScore', 'LScore','wloc','Wteam','Lteam'], axis=1, inplace=True)
#games.to_csv('game_elo.csv')
df = pd.DataFrame()
#changing Team names to include season so we can merge on specific season
names = season_elos['team_name'].astype(str) + '_' + season_elos['season'].astype(str)
df['team_name']= names
df['season_elo']=season_elos['season_elo']
#index-match season elo rating with Each team and the corresponding season
games['homeSeasonElo'] = games.home_teamname_season.map(df.set_index("team_name")["season_elo"].to_dict())
games['awaySeasonElo'] = games.away_teamname_season.map(df.set_index("team_name")["season_elo"].to_dict())
#dataframe is exported as csv
games.to_csv(r'~/jupyter/capstone_Group10/data/gamedatacleanedelo.csv', index = False)