# Elo Calculations Notebook

In this notebook, we will devise a way to quickly compute the most up-to-date elo rating for a given team.

Additionally, we will need to find a way to find to find the nearest future games so we may predict the winner. 

In [17]:
# Import statements
import pandas as pd
from datetime import datetime
from nba_api.stats.static import teams

from nba_api.stats.endpoints import teamgamelogs
from nba_api.stats.endpoints import teaminfocommon

In [18]:
teamGameLogs = teamgamelogs.TeamGameLogs(season_nullable='2023-24')
df = teamGameLogs.get_data_frames()[0]

df

Unnamed: 0,SEASON_YEAR,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,AST_RANK,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,AVAILABLE_FLAG
0,2023-24,1610612753,ORL,Orlando Magic,0022301191,2024-04-14T00:00:00,ORL vs. MIL,W,48.0,42,...,1031,1228,205,1003,710,968,1254,1243,127,1
1,2023-24,1610612759,SAS,San Antonio Spurs,0022301197,2024-04-14T00:00:00,SAS vs. DET,W,48.0,49,...,247,1228,553,1752,382,737,2439,583,88,1
2,2023-24,1610612761,TOR,Toronto Raptors,0022301189,2024-04-14T00:00:00,TOR @ MIA,L,48.0,38,...,1776,2204,1516,1752,1772,1672,1725,1977,2062,1
3,2023-24,1610612746,LAC,LA Clippers,0022301199,2024-04-14T00:00:00,LAC vs. HOU,L,48.0,41,...,1776,760,59,267,2012,250,2212,1840,1864,1
4,2023-24,1610612740,NOP,New Orleans Pelicans,0022301195,2024-04-14T00:00:00,NOP vs. LAL,L,48.0,41,...,1241,2204,839,2322,382,737,2212,1635,2096,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2455,2023-24,1610612758,SAC,Sacramento Kings,0022300072,2023-10-25T00:00:00,SAC @ UTA,W,48.0,47,...,678,760,1516,267,140,2267,790,247,321,1
2456,2023-24,1610612756,PHX,Phoenix Suns,0022300062,2023-10-24T00:00:00,PHX @ GSW,W,48.0,42,...,1776,2204,1865,450,1459,1866,304,1635,1003,1
2457,2023-24,1610612744,GSW,Golden State Warriors,0022300062,2023-10-24T00:00:00,GSW vs. PHX,L,48.0,36,...,2294,530,205,690,1772,2030,432,1906,1402,1
2458,2023-24,1610612747,LAL,Los Angeles Lakers,0022300061,2023-10-24T00:00:00,LAL @ DEN,L,48.0,41,...,1776,760,1865,1367,1459,968,1915,1697,1915,1


In [19]:
# Set initial Elo ratings manually

initial_elo = dict()  # Create a dictionary to hold initial elos

# ELO RATINGS AT START OF 2023-24 SEASON, from https://neilpaine.substack.com/p/2023-24-nba-elo-ratings
initial_elo["ATL"] = 1500
initial_elo["BKN"] = 1431
initial_elo["BOS"] = 1771
initial_elo["CHA"] = 1291
initial_elo["CHI"] = 1471
initial_elo["CLE"] = 1556
initial_elo["DAL"] = 1636
initial_elo["DEN"] = 1650
initial_elo["DET"] = 1287
initial_elo["GSW"] = 1603
initial_elo["HOU"] = 1531
initial_elo["IND"] = 1583
initial_elo["LAC"] = 1543
initial_elo["LAL"] = 1571
initial_elo["MEM"] = 1326
initial_elo["MIA"] = 1555
initial_elo["MIL"] = 1584
initial_elo["MIN"] = 1674
initial_elo["NOP"] = 1606
initial_elo["NYK"] = 1569
initial_elo["OKC"] = 1631
initial_elo["ORL"] = 1529
initial_elo["PHI"] = 1483
initial_elo["PHX"] = 1613
initial_elo["POR"] = 1300
initial_elo["SAC"] = 1546
initial_elo["SAS"] = 1372
initial_elo["TOR"] = 1291
initial_elo["UTA"] = 1365
initial_elo["WAS"] = 1282

In [20]:
##Sort games by date
df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'])
df = df.sort_values("GAME_DATE")
df = df.reset_index(drop=True) # drop old index
df

Unnamed: 0,SEASON_YEAR,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,AST_RANK,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,AVAILABLE_FLAG
0,2023-24,1610612743,DEN,Denver Nuggets,0022300061,2023-10-24,DEN vs. LAL,W,48.0,48,...,678,760,553,690,710,381,1254,837,485,1
1,2023-24,1610612756,PHX,Phoenix Suns,0022300062,2023-10-24,PHX @ GSW,W,48.0,42,...,1776,2204,1865,450,1459,1866,304,1635,1003,1
2,2023-24,1610612747,LAL,Los Angeles Lakers,0022300061,2023-10-24,LAL @ DEN,L,48.0,41,...,1776,760,1865,1367,1459,968,1915,1697,1915,1
3,2023-24,1610612744,GSW,Golden State Warriors,0022300062,2023-10-24,GSW vs. PHX,L,48.0,36,...,2294,530,205,690,1772,2030,432,1906,1402,1
4,2023-24,1610612758,SAC,Sacramento Kings,0022300072,2023-10-25,SAC @ UTA,W,48.0,47,...,678,760,1516,267,140,2267,790,247,321,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2455,2023-24,1610612737,ATL,Atlanta Hawks,0022301188,2024-04-14,ATL @ IND,L,48.0,39,...,1416,1920,1516,1003,1459,78,195,1114,2446,1
2456,2023-24,1610612745,HOU,Houston Rockets,0022301199,2024-04-14,HOU @ LAC,W,48.0,50,...,423,2077,1161,267,2012,143,2080,1044,547,1
2457,2023-24,1610612754,IND,Indiana Pacers,0022301188,2024-04-14,IND vs. ATL,W,48.0,65,...,7,1720,351,690,1096,2158,2319,1,15,1
2458,2023-24,1610612763,MEM,Memphis Grizzlies,0022301193,2024-04-14,MEM vs. DEN,L,48.0,45,...,1932,991,553,2080,1096,968,1494,1413,2062,1


In [21]:
# Functions to calculate and retrieve elos

# retrieve the previous elo
def get_prev_elo(team, date, season, stats_df, elo_df):
    # Get row of previous game
    prev_game = stats_df[(stats_df["GAME_DATE"] < date) & (stats_df["TEAM_ABBREVIATION"] == team)].sort_values(by="GAME_DATE").tail(1).iloc[0]
    
    # Extract the elo from that game
    elo_rating = elo_df[(elo_df["date"] == prev_game["GAME_DATE"]) & (elo_df["team"] == prev_game['TEAM_ABBREVIATION']) & (elo_df["team_opp"] == team_opponent(prev_game["GAME_ID"], prev_game["TEAM_ABBREVIATION"]))]['team_elo_after'].values[0]
    
    if prev_game["SEASON_YEAR"] != season:
        return (0.75 * elo_rating) + (0.25 * 1505) # This is how elo ratings are carried over to next season
    else:
        return elo_rating
    
def calculate_expected_win_probability(team_elo_rating, team_opp_elo_rating):
    E_team = 1./(1 + 10 ** ((team_opp_elo_rating - team_elo_rating) / (400.)))
    return E_team
    
def S_var(team_pts, opp_pts):
    S_team, S_opp = 0, 0
    
    if team_pts > opp_pts:
        S_team = 1
    elif opp_pts > team_pts:
        S_opp = 1
    else:
        S_team, S_opp = 0.5, 0.5
    
    return S_team, S_opp

# Compute the moving K constant from Silver's formula
def K_constant(MOV, elo_diff):
    K_0 = 20
    
    if MOV > 0:  # if "team" is the winner
        multiplier = ((MOV + 3)**(0.8)) / (7.5 + 0.006*(elo_diff))
    else:  # if "team_opp" is the winner
        multiplier = ((-MOV + 3)**(0.8)) / (7.5 + 0.006*(-elo_diff))  # note how we have to flip the elo_diff and make MOV positive
    
    return K_0 * multiplier


def update_elo(team_pts, opp_pts, team_elo_before, team_opp_elo_before, home):
    
    # In Silver's elo calculations, home advantage is accounted for by increasing the home team elo rating by 100 for the E and K calculations
    home_court_advantage = 100
    
    # Add the home court advantage (we will need to remove this later)
    if home == 1:
        team_elo_before += home_court_advantage
    else:
        team_opp_elo_before += home_court_advantage
    
    E_team = calculate_expected_win_probability(team_elo_before, team_opp_elo_before)
    E_team_opp = 1 - E_team
    
    elo_diff = team_elo_before - team_opp_elo_before
    
    # MOV = Margin of Victory
    MOV = team_pts - opp_pts
    
    # S variable in Silver's equation, value for was the winner
    S_team, S_opp = S_var(team_pts, opp_pts)
    
    # K constant
    K = K_constant(MOV, elo_diff)
    
    # Remove the home court advantage (we are done calculating the different variables, revert to true elo)
    if home == 1:
        team_elo_before -= home_court_advantage
    else:
        team_opp_elo_before -= home_court_advantage
    
    # Calculate the elos (which is a recursive formula)
    team_elo_after = team_elo_before + K * (S_team - E_team)
    team_opp_elo_after = team_opp_elo_before + K * (S_opp - E_team_opp)
    
    return team_elo_after, team_opp_elo_after

def team_opponent(game_id, team_we_know):
    # this function should spew out the opposing team
    df_opponent = df[(df['GAME_ID'] == game_id) & (df['TEAM_ABBREVIATION'] != team_we_know)]
    opponent = df_opponent['TEAM_ABBREVIATION'].iloc[0] 
    return opponent

def are_they_home(matchup):
    if "@" in matchup:
        return 0
    else:
        return 1
    
def team_opponent_pts(game_id, team_we_know):
    df_opponent = df[(df['GAME_ID'] == game_id) & (df['TEAM_ABBREVIATION'] != team_we_know)]
    team_pts = df_opponent['PTS'].iloc[0] 
    return team_pts
    

In [22]:
# Create DataFrames to store elo calculations for each game and elos for each team
elo_df = pd.DataFrame(columns=['date', 'season', 'team', 'team_opp', 'team_elo_before', 'team_opp_elo_before', 
                               'team_elo_after', 'team_opp_elo_after'])
# teams_elo_df = pd

row_count = 0

for index, row in df.iterrows():
    game_date = row["GAME_DATE"]
    season = row["SEASON_YEAR"]
    team = row["TEAM_ABBREVIATION"]
    
    team_opp = team_opponent(row["GAME_ID"], row["TEAM_ABBREVIATION"]) ##TODO: based on game_id, figure out who opponent is for that game
    is_this_team_home = are_they_home(row["MATCHUP"]) ##TODO: another function, using the matchup and '@' and vs. symbol to find which team was home
    
    team_pts = row["PTS"]
    opp_pts = team_opponent_pts(row["GAME_ID"], row["TEAM_ABBREVIATION"])##TODO: using game_id, find pts of opposing team
    
    # Check if we need to initialize the elo
    if (team not in elo_df["team"].values):
        team_starting_elo = initial_elo[team]
        team_elo_before = team_starting_elo
    else:
        team_elo_before = get_prev_elo(team, game_date, season, df, elo_df)
    
    if (team_opp not in elo_df["team_opp"].values):
        team_opp_starting_elo = initial_elo[team_opp]
        team_opp_elo_before = team_opp_starting_elo
    else:
        team_opp_elo_before = get_prev_elo(team_opp, game_date, season, df, elo_df)
    
    team_elo_after, team_opp_elo_after = update_elo(team_pts, opp_pts, team_elo_before, team_opp_elo_before, is_this_team_home)
    
    new_row_in_elo_df = {
        "date": game_date, 
        "season": season, 
        "team": team, 
        "team_opp": team_opp, 
        "team_elo_before": team_elo_before, 
        "team_opp_elo_before": team_opp_elo_before, 
        "team_elo_after": team_elo_after, 
        "team_opp_elo_after": team_opp_elo_after,
        "home": is_this_team_home
    }
    
    new_row = pd.DataFrame([new_row_in_elo_df])
    
    elo_df = pd.concat([elo_df, new_row], ignore_index=True)
    
    row_count += 1
    
    if(row_count % 1000 == 0):
        print(f"Processed {row_count}/{len(df)}")

  elo_df = pd.concat([elo_df, new_row], ignore_index=True)


Processed 1000/2460
Processed 2000/2460


In [23]:
elo_df

Unnamed: 0,date,season,team,team_opp,team_elo_before,team_opp_elo_before,team_elo_after,team_opp_elo_after,home
0,2023-10-24,2023-24,DEN,LAL,1650,1571,1655.354069,1565.645931,1.0
1,2023-10-24,2023-24,PHX,GSW,1613,1603,1621.541974,1594.458026,0.0
2,2023-10-24,2023-24,LAL,DEN,1571,1650,1565.645931,1655.354069,0.0
3,2023-10-24,2023-24,GSW,PHX,1603,1613,1594.458026,1621.541974,1.0
4,2023-10-25,2023-24,SAC,UTA,1546,1365,1556.179499,1354.820501,0.0
...,...,...,...,...,...,...,...,...,...
2455,2024-04-14,2023-24,ATL,IND,1466.524587,1594.995107,1456.497037,1605.022657,0.0
2456,2024-04-14,2023-24,HOU,LAC,1512.394595,1557.937333,1529.792229,1540.539698,0.0
2457,2024-04-14,2023-24,IND,ATL,1594.995107,1466.524587,1605.022657,1456.497037,1.0
2458,2024-04-14,2023-24,MEM,DEN,1308.251101,1667.94268,1304.166944,1672.026836,1.0


In [24]:
def get_recent_elo(team_abbv):
    # Ensure the 'date' column is in the correct datetime format (if not already)
    elo_df['date'] = pd.to_datetime(elo_df['date'])
    
    # Filter the DataFrame for rows matching the specific team
    team_data = elo_df[elo_df["team"] == team_abbv]
    
    # Find the most recent date in the filtered DataFrame
    most_recent_date = team_data['date'].max()
    
    # Filter the DataFrame again for rows that match the most recent date for the team
    elo_df_now = team_data[team_data["date"] == most_recent_date]
    
    # Return the 'team_elo_after' value for the most recent date
    # Using .iloc[-1] to ensure we get the last (or only) record if there are multiple games on the most recent date
    return elo_df_now["team_elo_after"].iloc[0]
    
# Silver's point spread formula
def point_spread(team_elo, opp_elo):
    elo_diff = team_elo - opp_elo
    return elo_diff / 28.


# Formula for season wins from polynomial regression
def project_season_wins(elo):
    return (-0.000000185006724245061 * (elo ** 3) + 0.000835470379387845 * (elo ** 2) - 1.15355230436639 * (
        elo) + 515.526317931045)

In [25]:
print(get_recent_elo("MIA"))

1554.9637714704277


In [28]:
#spews dataframe of 1 row with stats that include: teams, winner, point differential, what percantage they have of winning per team

#function for fantasy games (we cant use home)
def get_winner(team_abbv1, team_abbv2):
    team_elo_1 = get_recent_elo(team_abbv1)
    team_elo_2 = get_recent_elo(team_abbv2)
    print(team_abbv1, team_elo_1, team_abbv2, team_elo_2)
    winner = team_abbv1
    if team_elo_1 < team_elo_2:
        winner = team_abbv2
        
    win_prob =  calculate_expected_win_probability(team_elo_1, team_elo_2)
    point_spread_new = point_spread(team_elo_1, team_elo_2)
    data = {'Team1':[team_abbv1], 'Team2':[team_abbv2], 'Expected_Winner':[winner], 'Winner_Probability':[win_prob], 'Point_Spread':[point_spread_new]}
    new_df = pd.DataFrame(data)
    return new_df 

In [29]:
print(get_winner("MIN", "PHX"))

MIN 1666.3041633028497 PHX 1654.35241243539
  Team1 Team2 Expected_Winner  Winner_Probability  Point_Spread
0   MIN   PHX             MIN            0.517193      0.426848
