# Elo Calculations Notebook

In this notebook, we will devise a way to quickly compute the most up-to-date elo rating for a given team.

Additionally, we will need to find a way to find to find the nearest future games so we may predict the winner. 

In [102]:
# Import statements
import pandas as pd
from datetime import datetime
from nba_api.stats.static import teams

from nba_api.stats.endpoints import teamgamelogs
from nba_api.stats.endpoints import teaminfocommon

In [103]:
teamGameLogs = teamgamelogs.TeamGameLogs(season_nullable='2023-24')
df = teamGameLogs.get_data_frames()[0]

df

Unnamed: 0,SEASON_YEAR,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,AST_RANK,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,AVAILABLE_FLAG
0,2023-24,1610612756,PHX,Phoenix Suns,0022301138,2024-04-07T00:00:00,PHX vs. NOP,L,48.0,39,...,511,1644,1095,256,1686,681,1447,1755,1598,1
1,2023-24,1610612753,ORL,Orlando Magic,0022301136,2024-04-07T00:00:00,ORL vs. CHI,W,48.0,42,...,1515,324,99,1305,1044,681,1447,1188,345,1
2,2023-24,1610612744,GSW,Golden State Warriors,0022301142,2024-04-07T00:00:00,GSW vs. UTA,W,48.0,49,...,313,1644,520,256,1388,1131,2332,860,680,1
3,2023-24,1610612740,NOP,New Orleans Pelicans,0022301138,2024-04-07T00:00:00,NOP @ PHX,W,48.0,40,...,1185,1180,791,431,1917,681,1447,1188,680,1
4,2023-24,1610612757,POR,Portland Trail Blazers,0022301134,2024-04-07T00:00:00,POR @ BOS,L,48.0,40,...,1185,1438,791,1989,1917,68,2220,1623,2045,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2341,2023-24,1610612748,MIA,Miami Heat,0022300068,2023-10-25T00:00:00,MIA vs. DET,W,48.0,37,...,1846,49,190,1676,2335,901,299,1886,1133,1
2342,2023-24,1610612743,DEN,Denver Nuggets,0022300061,2023-10-24T00:00:00,DEN vs. LAL,W,48.0,48,...,649,732,520,662,672,347,1217,797,458,1
2343,2023-24,1610612747,LAL,Los Angeles Lakers,0022300061,2023-10-24T00:00:00,LAL @ DEN,L,48.0,41,...,1700,732,1773,1305,1388,901,1845,1623,1832,1
2344,2023-24,1610612744,GSW,Golden State Warriors,0022300062,2023-10-24T00:00:00,GSW vs. PHX,L,48.0,36,...,2190,505,190,662,1686,1923,425,1817,1336,1


In [104]:
# Set initial Elo ratings manually

initial_elo = dict()  # Create a dictionary to hold initial elos

# ELO RATINGS AT START OF 2023-24 SEASON, from https://neilpaine.substack.com/p/2023-24-nba-elo-ratings
initial_elo["ATL"] = 1500
initial_elo["BKN"] = 1431
initial_elo["BOS"] = 1771
initial_elo["CHA"] = 1291
initial_elo["CHI"] = 1471
initial_elo["CLE"] = 1556
initial_elo["DAL"] = 1636
initial_elo["DEN"] = 1650
initial_elo["DET"] = 1287
initial_elo["GSW"] = 1603
initial_elo["HOU"] = 1531
initial_elo["IND"] = 1583
initial_elo["LAC"] = 1543
initial_elo["LAL"] = 1571
initial_elo["MEM"] = 1326
initial_elo["MIA"] = 1555
initial_elo["MIL"] = 1584
initial_elo["MIN"] = 1674
initial_elo["NOP"] = 1606
initial_elo["NYK"] = 1569
initial_elo["OKC"] = 1631
initial_elo["ORL"] = 1529
initial_elo["PHI"] = 1483
initial_elo["PHX"] = 1613
initial_elo["POR"] = 1300
initial_elo["SAC"] = 1546
initial_elo["SAS"] = 1372
initial_elo["TOR"] = 1291
initial_elo["UTA"] = 1365
initial_elo["WAS"] = 1282

In [105]:
##Sort games by date
df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'])
df = df.sort_values("GAME_DATE")
df = df.reset_index(drop=True) # drop old index
df

Unnamed: 0,SEASON_YEAR,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,AST_RANK,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,AVAILABLE_FLAG
0,2023-24,1610612756,PHX,Phoenix Suns,0022300062,2023-10-24,PHX @ GSW,W,48.0,42,...,1700,2104,1773,431,1388,1765,299,1564,957,1
1,2023-24,1610612743,DEN,Denver Nuggets,0022300061,2023-10-24,DEN vs. LAL,W,48.0,48,...,649,732,520,662,672,347,1217,797,458,1
2,2023-24,1610612744,GSW,Golden State Warriors,0022300062,2023-10-24,GSW vs. PHX,L,48.0,36,...,2190,505,190,662,1686,1923,425,1817,1336,1
3,2023-24,1610612747,LAL,Los Angeles Lakers,0022300061,2023-10-24,LAL @ DEN,L,48.0,41,...,1700,732,1773,1305,1388,901,1845,1623,1832,1
4,2023-24,1610612758,SAC,Sacramento Kings,0022300072,2023-10-25,SAC @ UTA,W,48.0,47,...,649,732,1435,256,131,2156,769,240,303,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2341,2023-24,1610612742,DAL,Dallas Mavericks,0022301131,2024-04-07,DAL vs. HOU,W,53.0,46,...,1971,1835,2002,662,1388,2224,1,13,516,1
2342,2023-24,1610612748,MIA,Miami Heat,0022301133,2024-04-07,MIA @ IND,L,48.0,39,...,1971,49,2177,1989,1388,1765,192,1063,1215,1
2343,2023-24,1610612739,CLE,Cleveland Cavaliers,0022301132,2024-04-07,CLE @ LAC,L,48.0,42,...,985,505,326,1989,131,503,425,860,1215,1
2344,2023-24,1610612745,HOU,Houston Rockets,0022301131,2024-04-07,HOU @ DAL,L,53.0,46,...,405,505,190,662,1388,2343,85,102,1783,1


In [106]:
# Functions to calculate and retrieve elos

# retrieve the previous elo
def get_prev_elo(team, date, season, stats_df, elo_df):
    # Get row of previous game
    prev_game = stats_df[(stats_df["GAME_DATE"] < date) & (stats_df["TEAM_ABBREVIATION"] == team)].sort_values(by="GAME_DATE").tail(1).iloc[0]
    
    # Extract the elo from that game
    elo_rating = elo_df[(elo_df["date"] == prev_game["GAME_DATE"]) & (elo_df["team"] == prev_game['TEAM_ABBREVIATION']) & (elo_df["team_opp"] == team_opponent(prev_game["GAME_ID"], prev_game["TEAM_ABBREVIATION"]))]['team_elo_after'].values[0]
    
    if prev_game["SEASON_YEAR"] != season:
        return (0.75 * elo_rating) + (0.25 * 1505) # This is how elo ratings are carried over to next season
    else:
        return elo_rating
    
def calculate_expected_win_probability(team_elo_rating, team_opp_elo_rating):
    E_team = 1./(1 + 10 ** ((team_opp_elo_rating - team_elo_rating) / (400.)))
    return E_team
    
def S_var(team_pts, opp_pts):
    S_team, S_opp = 0, 0
    
    if team_pts > opp_pts:
        S_team = 1
    elif opp_pts > team_pts:
        S_opp = 1
    else:
        S_team, S_opp = 0.5, 0.5
    
    return S_team, S_opp

# Compute the moving K constant from Silver's formula
def K_constant(MOV, elo_diff):
    K_0 = 20
    
    if MOV > 0:  # if "team" is the winner
        multiplier = ((MOV + 3)**(0.8)) / (7.5 + 0.006*(elo_diff))
    else:  # if "team_opp" is the winner
        multiplier = ((-MOV + 3)**(0.8)) / (7.5 + 0.006*(-elo_diff))  # note how we have to flip the elo_diff and make MOV positive
    
    return K_0 * multiplier


def update_elo(team_pts, opp_pts, team_elo_before, team_opp_elo_before, home):
    
    # In Silver's elo calculations, home advantage is accounted for by increasing the home team elo rating by 100 for the E and K calculations
    home_court_advantage = 100
    
    # Add the home court advantage (we will need to remove this later)
    if home == 1:
        team_elo_before += home_court_advantage
    else:
        team_opp_elo_before += home_court_advantage
    
    E_team = calculate_expected_win_probability(team_elo_before, team_opp_elo_before)
    E_team_opp = 1 - E_team
    
    elo_diff = team_elo_before - team_opp_elo_before
    
    # MOV = Margin of Victory
    MOV = team_pts - opp_pts
    
    # S variable in Silver's equation, value for was the winner
    S_team, S_opp = S_var(team_pts, opp_pts)
    
    # K constant
    K = K_constant(MOV, elo_diff)
    
    # Remove the home court advantage (we are done calculating the different variables, revert to true elo)
    if home == 1:
        team_elo_before -= home_court_advantage
    else:
        team_opp_elo_before -= home_court_advantage
    
    # Calculate the elos (which is a recursive formula)
    team_elo_after = team_elo_before + K * (S_team - E_team)
    team_opp_elo_after = team_opp_elo_before + K * (S_opp - E_team_opp)
    
    return team_elo_after, team_opp_elo_after

def team_opponent(game_id, team_we_know):
    # this function should spew out the opposing team
    df_opponent = df[(df['GAME_ID'] == game_id) & (df['TEAM_ABBREVIATION'] != team_we_know)]
    opponent = df_opponent['TEAM_ABBREVIATION'].iloc[0] 
    return opponent

def are_they_home(matchup):
    if "@" in matchup:
        return 0
    else:
        return 1
    
def team_opponent_pts(game_id, team_we_know):
    df_opponent = df[(df['GAME_ID'] == game_id) & (df['TEAM_ABBREVIATION'] != team_we_know)]
    team_pts = df_opponent['PTS'].iloc[0] 
    return team_pts
    

In [107]:
# Create DataFrames to store elo calculations for each game and elos for each team
elo_df = pd.DataFrame(columns=['date', 'season', 'team', 'team_opp', 'team_elo_before', 'team_opp_elo_before', 
                               'team_elo_after', 'team_opp_elo_after'])
# teams_elo_df = pd

row_count = 0

for index, row in df.iterrows():
    game_date = row["GAME_DATE"]
    season = row["SEASON_YEAR"]
    team = row["TEAM_ABBREVIATION"]
    
    team_opp = team_opponent(row["GAME_ID"], row["TEAM_ABBREVIATION"]) ##TODO: based on game_id, figure out who opponent is for that game
    is_this_team_home = are_they_home(row["MATCHUP"]) ##TODO: another function, using the matchup and '@' and vs. symbol to find which team was home
    
    team_pts = row["PTS"]
    opp_pts = team_opponent_pts(row["GAME_ID"], row["TEAM_ABBREVIATION"])##TODO: using game_id, find pts of opposing team
    
    # Check if we need to initialize the elo
    if (team not in elo_df["team"].values):
        team_starting_elo = initial_elo[team]
        team_elo_before = team_starting_elo
    else:
        team_elo_before = get_prev_elo(team, game_date, season, df, elo_df)
    
    if (team_opp not in elo_df["team_opp"].values):
        team_opp_starting_elo = initial_elo[team_opp]
        team_opp_elo_before = team_opp_starting_elo
    else:
        team_opp_elo_before = get_prev_elo(team_opp, game_date, season, df, elo_df)
    
    team_elo_after, team_opp_elo_after = update_elo(team_pts, opp_pts, team_elo_before, team_opp_elo_before, is_this_team_home)
    
    new_row_in_elo_df = {
        "date": game_date, 
        "season": season, 
        "team": team, 
        "team_opp": team_opp, 
        "team_elo_before": team_elo_before, 
        "team_opp_elo_before": team_opp_elo_before, 
        "team_elo_after": team_elo_after, 
        "team_opp_elo_after": team_opp_elo_after,
        "home": is_this_team_home
    }
    
    new_row = pd.DataFrame([new_row_in_elo_df])
    
    elo_df = pd.concat([elo_df, new_row], ignore_index=True)
    
    row_count += 1
    
    if(row_count % 1000 == 0):
        print(f"Processed {row_count}/{len(df)}")

  elo_df = pd.concat([elo_df, new_row], ignore_index=True)


Processed 1000/2346
Processed 2000/2346


In [108]:
elo_df

Unnamed: 0,date,season,team,team_opp,team_elo_before,team_opp_elo_before,team_elo_after,team_opp_elo_after,home
0,2023-10-24,2023-24,PHX,GSW,1613,1603,1621.541974,1594.458026,0.0
1,2023-10-24,2023-24,DEN,LAL,1650,1571,1655.354069,1565.645931,1.0
2,2023-10-24,2023-24,GSW,PHX,1603,1613,1594.458026,1621.541974,1.0
3,2023-10-24,2023-24,LAL,DEN,1571,1650,1565.645931,1655.354069,0.0
4,2023-10-25,2023-24,SAC,UTA,1546,1365,1556.179499,1354.820501,0.0
...,...,...,...,...,...,...,...,...,...
2341,2024-04-07,2023-24,DAL,HOU,1653.992249,1507.596093,1657.578038,1504.010304,1.0
2342,2024-04-07,2023-24,MIA,IND,1559.97416,1594.894924,1557.226304,1597.642779,0.0
2343,2024-04-07,2023-24,CLE,LAC,1536.971667,1562.655139,1534.103629,1565.523177,0.0
2344,2024-04-07,2023-24,HOU,DAL,1507.596093,1653.992249,1504.010304,1657.578038,0.0


In [135]:
def get_recent_elo(team_abbv):
    # Ensure the 'date' column is in the correct datetime format (if not already)
    elo_df['date'] = pd.to_datetime(elo_df['date'])
    
    # Filter the DataFrame for rows matching the specific team
    team_data = elo_df[elo_df["team"] == team_abbv]
    
    # Find the most recent date in the filtered DataFrame
    most_recent_date = team_data['date'].max()
    
    # Filter the DataFrame again for rows that match the most recent date for the team
    elo_df_now = team_data[team_data["date"] == most_recent_date]
    
    # Return the 'team_elo_after' value for the most recent date
    # Using .iloc[-1] to ensure we get the last (or only) record if there are multiple games on the most recent date
    return elo_df_now["team_elo_after"].iloc[0]
    

In [137]:
print(get_recent_elo("PHX"))

1625.4149437986496
