In [7]:
import pandas as pd
import numpy as np

In [8]:
# get data from Basketball-Reference
url = "https://www.basketball-reference.com/leagues/NBA_2023_games.html"
df = pd.read_html(url)[0]

In [9]:
df.head()

Unnamed: 0,Date,Start (ET),Visitor/Neutral,PTS,Home/Neutral,PTS.1,Unnamed: 6,Unnamed: 7,Attend.,Arena,Notes
0,"Tue, Oct 18, 2022",7:30p,Philadelphia 76ers,117,Boston Celtics,126,Box Score,,19156.0,TD Garden,
1,"Tue, Oct 18, 2022",10:00p,Los Angeles Lakers,109,Golden State Warriors,123,Box Score,,18064.0,Chase Center,
2,"Wed, Oct 19, 2022",7:00p,Orlando Magic,109,Detroit Pistons,113,Box Score,,20190.0,Little Caesars Arena,
3,"Wed, Oct 19, 2022",7:00p,Washington Wizards,114,Indiana Pacers,107,Box Score,,15027.0,Gainbridge Fieldhouse,
4,"Wed, Oct 19, 2022",7:30p,Houston Rockets,107,Atlanta Hawks,117,Box Score,,17878.0,State Farm Arena,


In [10]:
team_names = df['Visitor/Neutral'].unique() # numpy array of length 30

In [11]:
team_names

array(['Philadelphia 76ers', 'Los Angeles Lakers', 'Orlando Magic',
       'Washington Wizards', 'Houston Rockets', 'New Orleans Pelicans',
       'New York Knicks', 'Chicago Bulls', 'Cleveland Cavaliers',
       'Oklahoma City Thunder', 'Charlotte Hornets', 'Denver Nuggets',
       'Dallas Mavericks', 'Portland Trail Blazers', 'Milwaukee Bucks',
       'Los Angeles Clippers', 'San Antonio Spurs', 'Toronto Raptors',
       'Boston Celtics', 'Detroit Pistons', 'Memphis Grizzlies',
       'Utah Jazz', 'Phoenix Suns', 'Minnesota Timberwolves',
       'Sacramento Kings', 'Indiana Pacers', 'Brooklyn Nets',
       'Golden State Warriors', 'Atlanta Hawks', 'Miami Heat'],
      dtype=object)

In [6]:
# create dictionary of intiialized elo scores
INITIAL_ELO_SCORE = 1500
keys = team_names
values = INITIAL_ELO_SCORE*np.ones(len(keys))
elo_scores_dict = dict(zip(keys, values))

In [11]:
print(elo_scores_dict)

{'Philadelphia 76ers': 1500.0, 'Los Angeles Lakers': 1500.0, 'Orlando Magic': 1500.0, 'Washington Wizards': 1500.0, 'Houston Rockets': 1500.0, 'New Orleans Pelicans': 1500.0, 'New York Knicks': 1500.0, 'Chicago Bulls': 1500.0, 'Cleveland Cavaliers': 1500.0, 'Oklahoma City Thunder': 1500.0, 'Charlotte Hornets': 1500.0, 'Denver Nuggets': 1500.0, 'Dallas Mavericks': 1500.0, 'Portland Trail Blazers': 1500.0, 'Milwaukee Bucks': 1500.0, 'Los Angeles Clippers': 1500.0, 'San Antonio Spurs': 1500.0, 'Toronto Raptors': 1500.0, 'Boston Celtics': 1500.0, 'Detroit Pistons': 1500.0, 'Memphis Grizzlies': 1500.0, 'Utah Jazz': 1500.0, 'Phoenix Suns': 1500.0, 'Minnesota Timberwolves': 1500.0, 'Sacramento Kings': 1500.0, 'Indiana Pacers': 1500.0, 'Brooklyn Nets': 1500.0, 'Golden State Warriors': 1500.0, 'Atlanta Hawks': 1500.0, 'Miami Heat': 1500.0}


In [None]:
''' 
Now update scores
'''

In [12]:
# Example to show how we can isolate each row
for row in range(len(df)):
    
    print(df.iloc[row, :])
    
    break 

Date                Tue, Oct 18, 2022
Start (ET)                      7:30p
Visitor/Neutral    Philadelphia 76ers
PTS                               117
Home/Neutral           Boston Celtics
PTS.1                             126
Unnamed: 6                  Box Score
Unnamed: 7                        NaN
Attend.                       19156.0
Arena                       TD Garden
Notes                             NaN
Name: 0, dtype: object


In [16]:
# Function that calculates the expected score of a team
def calc_expected_score(team_rating: float, opponent_rating: float) -> float: 
    exponent = (opponent_rating - team_rating)/400
    return 1/(1 + np.power(10, exponent))

In [17]:
# initialize K value in elo algorithm
K = 32

# iterate over every row of dataframe
for row_index in range(len(df)):
    
    # store information for the game
    game_row = df.iloc[row_index, :]    # pandas series
    home_team = game_row['Home/Neutral']
    home_pts = game_row['PTS']
    away_team = game_row['Visitor/Neutral']
    away_pts = game_row['PTS.1']
    
    # get elo values of teams
    home_original_elo = elo_scores_dict[home_team]
    away_original_elo = elo_scores_dict[away_team]
    
    # calculate expected scores
    home_expected_score = calc_expected_score(home_original_elo, away_original_elo)
    away_expected_score = calc_expected_score(away_original_elo, home_original_elo)
    
    # Calculate new elo values
    if home_pts > away_pts:
        # HOME TEAM WON
        home_updated_elo = home_original_elo + K*(1 - home_expected_score)
        away_updated_elo = away_original_elo + K*(0 - away_expected_score)
    else:
        # AWAY TEAM WON
        home_updated_elo = home_original_elo + K*(0 - home_expected_score)
        away_updated_elo = away_original_elo + K*(1 - away_expected_score)
    
    # update ELO values
    elo_scores_dict[home_team] = home_updated_elo
    elo_scores_dict[away_team] = away_updated_elo
    

In [18]:
print(elo_scores_dict)

{'Philadelphia 76ers': 1491.472840891314, 'Los Angeles Lakers': 1550.2418486432596, 'Orlando Magic': 1565.3135930930694, 'Washington Wizards': 1522.461519556463, 'Houston Rockets': 1576.7865997783865, 'New Orleans Pelicans': 1470.3253289201298, 'New York Knicks': 1498.572188039835, 'Chicago Bulls': 1516.845010106631, 'Cleveland Cavaliers': 1438.9290488344946, 'Oklahoma City Thunder': 1493.5822661450911, 'Charlotte Hornets': 1518.3435657120367, 'Denver Nuggets': 1487.6197564301053, 'Dallas Mavericks': 1500.1518030069285, 'Portland Trail Blazers': 1447.730552206142, 'Milwaukee Bucks': 1415.3394400334469, 'Los Angeles Clippers': 1522.5174220744711, 'San Antonio Spurs': 1454.7771053065296, 'Toronto Raptors': 1488.85391844581, 'Boston Celtics': 1474.7297233771644, 'Detroit Pistons': 1552.1951392728454, 'Memphis Grizzlies': 1491.3798359096972, 'Utah Jazz': 1443.7600192668813, 'Phoenix Suns': 1439.365200851544, 'Minnesota Timberwolves': 1487.30693942636, 'Sacramento Kings': 1522.574193339895,

In [26]:
# How to sort dictionary in python by values
# https://stackoverflow.com/questions/613183/how-do-i-sort-a-dictionary-by-value

sorted_elo_list = [(k,v) for k, v in sorted(elo_scores_dict.items(), key=lambda item: item[1], reverse=True)]   # list of tuples


In [27]:
sorted_elo_list[0:5]

[('Houston Rockets', 1576.7865997783865),
 ('Orlando Magic', 1565.3135930930694),
 ('Detroit Pistons', 1552.1951392728454),
 ('Los Angeles Lakers', 1550.2418486432596),
 ('Miami Heat', 1542.6226899331364)]

In [29]:
def create_initial_elo(initial_elo_score, team_names):
    keys = team_names
    values = initial_elo_score*np.ones(len(keys))
    elo_scores_dict = dict(zip(keys, values))
    
    return elo_scores_dict

In [39]:
''' 
Function to update elo rankings. Updates in place
'''
def update_elo_rankings(df, K, elo_scores_dict):

    # iterate over every row of dataframe
    for row_index in range(len(df)):
        
        # store information for the game
        game_row = df.iloc[row_index, :]    # pandas series
        home_team = game_row['Home/Neutral']
        home_pts = game_row['PTS']
        away_team = game_row['Visitor/Neutral']
        away_pts = game_row['PTS.1']
        
        # get elo values of teams
        home_original_elo = elo_scores_dict[home_team]
        away_original_elo = elo_scores_dict[away_team]
        
        # calculate expected scores
        home_expected_score = calc_expected_score(home_original_elo, away_original_elo)
        away_expected_score = calc_expected_score(away_original_elo, home_original_elo)
        
        # Calculate new elo values
        if home_pts > away_pts:
            # HOME TEAM WON
            home_updated_elo = home_original_elo + K*(1 - home_expected_score)
            away_updated_elo = away_original_elo + K*(0 - away_expected_score)
        else:
            # AWAY TEAM WON
            home_updated_elo = home_original_elo + K*(0 - home_expected_score)
            away_updated_elo = away_original_elo + K*(1 - away_expected_score)
        
        # update ELO values
        elo_scores_dict[home_team] = home_updated_elo
        elo_scores_dict[away_team] = away_updated_elo
    
    return elo_scores_dict

In [42]:
elo_dict_2 = create_initial_elo(1500, team_names)
elo_dict_2 = update_elo_rankings(df, 32, elo_dict_2)

In [43]:
elo_dict_2

{'Philadelphia 76ers': 1491.472840891314,
 'Los Angeles Lakers': 1550.2418486432596,
 'Orlando Magic': 1565.3135930930694,
 'Washington Wizards': 1522.461519556463,
 'Houston Rockets': 1576.7865997783865,
 'New Orleans Pelicans': 1470.3253289201298,
 'New York Knicks': 1498.572188039835,
 'Chicago Bulls': 1516.845010106631,
 'Cleveland Cavaliers': 1438.9290488344946,
 'Oklahoma City Thunder': 1493.5822661450911,
 'Charlotte Hornets': 1518.3435657120367,
 'Denver Nuggets': 1487.6197564301053,
 'Dallas Mavericks': 1500.1518030069285,
 'Portland Trail Blazers': 1447.730552206142,
 'Milwaukee Bucks': 1415.3394400334469,
 'Los Angeles Clippers': 1522.5174220744711,
 'San Antonio Spurs': 1454.7771053065296,
 'Toronto Raptors': 1488.85391844581,
 'Boston Celtics': 1474.7297233771644,
 'Detroit Pistons': 1552.1951392728454,
 'Memphis Grizzlies': 1491.3798359096972,
 'Utah Jazz': 1443.7600192668813,
 'Phoenix Suns': 1439.365200851544,
 'Minnesota Timberwolves': 1487.30693942636,
 'Sacramento Ki