In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import pickle
from datetime import datetime

# plotting
import plotly.graph_objects as go 
import ast


 First step - grab the schedule! Each month's schedule is stored on a separate page, so we'll check each one based on the current date.

In [2]:
def get_schedule():

    months = ['october','november','december','january','february','march','april']
    current_month = datetime.now().strftime("%B").lower()
    
    schedule = pd.DataFrame()

    for month in months:
        url = f"https://www.basketball-reference.com/leagues/NBA_2025_games-{month}.html"

        # Add headers to mimic a browser request
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }

        try:
            # Make the request with a slight delay to be respectful to the server
            time.sleep(3)
            response = requests.get(url, headers=headers)
            response.raise_for_status()  # Raise an exception for bad status codes
            
            # Parse the HTML content
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Find the schedule table
            table = soup.find('table', id='schedule')
            
            if table is None:
                raise ValueError("Schedule table not found on the page")

            # Define the columns we want to extract
            columns = [
                'Date',
                'Start (ET)',
                'Visitor/Neutral',
                'away_pts',
                'Home/Neutral',
                'home_pts',
                'Box Score',
                'OT',
                'Attend.',
                'LOG',
                'Arena',
                'Notes'
            ]
            
            # Extract rows
            rows = []
            for row in table.find_all('tr')[1:]:  # Skip header row
                game_data = []
                cells = row.find_all(['td', 'th'])
                if cells:  # Only process rows with data
                    for cell in cells:
                        text = cell.text.strip()
                        game_data.append(text)
                    if game_data:  # Only append non-empty rows
                        rows.append(game_data)
            
            # Create DataFrame with explicit column names
            df = pd.DataFrame(rows, columns=columns)
            df = df[df['away_pts'] != '']
            schedule = pd.concat((schedule, df))
            
            if month == current_month:
                break
                



        except requests.RequestException as e:
            print(f"Error fetching the webpage: {e}")
        except Exception as e:
            print(f"Error processing the data: {e}")

    # Optionally save to CSV
    schedule.to_csv('2025_schedule.csv', index=False)
    print("\nData has been saved to '2025_schedule.csv'")
    return schedule

schedule = get_schedule()


Data has been saved to '2025_schedule.csv'


Now we calculate the Elo of each team game-by-game:

In [42]:
def expected_score(rating_a, rating_b):
    return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))

def update_elo(rating, expected, actual, k=20):
    """
    Update the Elo rating based on game result.
    :param rating: Current Elo rating
    :param expected: Expected score
    :param actual: Actual score (1 for win, 0 for loss)
    :param k: K-factor
    :return: Updated Elo rating
    """
    return rating + k * (actual - expected)


# Load Elos from end of previous season
with open('../2023-24/final_elos.pkl', 'rb') as f:
    current_elos = pickle.load(f) 

elo_histories = {}
team_names = list(schedule['Visitor/Neutral'].unique())
for name in team_names:
    elo_histories[name] = [current_elos[name]]

In [43]:
def calculate_elos(schedule, current_elos, elo_histories):
    '''
    schedule: pd.Dataframe containing game logs
    
    current_elos: {str: int} form dictionary showing each team's current Elo
    
    elo_histories: {str: [int]} form dictionary showing all Elo values throughout the season 
    (used for plotting later on)
    '''

    for index, row in schedule.iterrows(): # i know iterrows sucks and vectors are better,
                                    # but it's sequential data that's updated over time 

        away = row['Visitor/Neutral']
        away_score = int(row['away_pts'])
        home = row['Home/Neutral']
        home_score = int(row['home_pts'])
        print(home_score, away_score)
        home_elo = current_elos[home]
        away_elo = current_elos[away]

        if home_score > away_score:
            print("home team wins")
            print(home, 'beats', away, home_score, 'to', away_score)
            result = 1
        else:
            print("away team wins")
            print(away, 'beats', home, away_score, 'to', home_score)
            result = 0

        
        
        expected_home_score = expected_score(current_elos[home], current_elos[away])
        print('ehome:', expected_home_score)
        expected_away_score = expected_score(current_elos[away], current_elos[home])
        print('eaway:', expected_away_score)
        new_rating_home = update_elo(home_elo, expected_home_score, result)
        new_rating_away = update_elo(away_elo, expected_away_score, 1 - result)

        elo_histories[home].append(new_rating_home)
        elo_histories[away].append(new_rating_away)

        current_elos[home] = new_rating_home
        current_elos[away] = new_rating_away
    
    return current_elos, elo_histories

In [44]:
current_elos, elo_histories = calculate_elos(schedule, current_elos, elo_histories)
elo_histories

132 109
home team wins
Boston Celtics beats New York Knicks 132 to 109
ehome: 0.6340479064247586
eaway: 0.36595209357524144
110 103
home team wins
Los Angeles Lakers beats Minnesota Timberwolves 110 to 103
ehome: 0.4657777957787788
eaway: 0.5342222042212211
109 115
away team wins
Indiana Pacers beats Detroit Pistons 115 to 109
ehome: 0.17567908160629842
eaway: 0.8243209183937016
120 116
home team wins
Atlanta Hawks beats Brooklyn Nets 120 to 116
ehome: 0.5638802417792655
eaway: 0.4361197582207344
97 116
away team wins
Orlando Magic beats Miami Heat 116 to 97
ehome: 0.5112588941935251
eaway: 0.48874110580647495
109 124
away team wins
Milwaukee Bucks beats Philadelphia 76ers 124 to 109
ehome: 0.5088768417783281
eaway: 0.49112315822167196
106 136
away team wins
Cleveland Cavaliers beats Toronto Raptors 136 to 106
ehome: 0.25024087116036825
eaway: 0.7497591288396317
105 110
away team wins
Charlotte Hornets beats Houston Rockets 110 to 105
ehome: 0.7174997030414094
eaway: 0.2825002969585906

{'New York Knicks': [1584.711486893246,
  1577.3924450217412,
  1587.0328727564695,
  1575.4472350744682,
  1584.3491247154222,
  1587.538459571707,
  1575.2221808673369,
  1561.4969052278495],
 'Minnesota Timberwolves': [1607.4606388711322,
  1596.7761947867077,
  1604.835233202691,
  1608.3770782156566,
  1598.1244753089975,
  1608.7853915039536,
  1593.4975473430454,
  1597.5786190138465,
  1604.1168506440956],
 'Indiana Pacers': [1561.380655075586,
  1564.894236707712,
  1555.2538089729837,
  1543.9638154362176,
  1533.6240225945237,
  1547.9944124293731,
  1538.0315056644272,
  1549.9263956917391,
  1558.7276517799571],
 'Brooklyn Nets': [1410.8879283939752,
  1402.1655332295607,
  1395.9480280632931,
  1409.5743751885934,
  1404.9124932232166,
  1414.3347923719932,
  1426.894679818537,
  1413.2061924581733,
  1422.8921935818075],
 'Orlando Magic': [1530.2250287664333,
  1540.4502066503037,
  1546.6677118165712,
  1532.1536562407475,
  1542.4934490824414,
  1531.1040901100903,
  1

In [45]:
from plotting import elo_bar_plot, elo_line_plot, elo_delta_plot

fig = elo_bar_plot(current_elos)
fig.show()

In [46]:
fig = elo_line_plot(elo_histories)
fig.show()

In [47]:
elo_deltas = {name: 0 for name in team_names}

for name in team_names:
    history = elo_histories[name]
    delta = history[-1] - history[0]
    elo_deltas[name] = delta
    
elo_deltas

{'New York Knicks': -23.21458166539651,
 'Minnesota Timberwolves': -3.3437882270366117,
 'Indiana Pacers': -2.653003295628878,
 'Brooklyn Nets': 12.004265187832289,
 'Orlando Magic': -30.949751461639835,
 'Milwaukee Bucks': -42.854294848860036,
 'Cleveland Cavaliers': 69.95673252392498,
 'Charlotte Hornets': 10.643481497446146,
 'Chicago Bulls': -22.80995868942159,
 'Memphis Grizzlies': 33.56730218603866,
 'Phoenix Suns': 43.63724985529757,
 'Golden State Warriors': 40.75679868418433,
 'Boston Celtics': -1.2714604109639822,
 'San Antonio Spurs': 16.57797612183026,
 'Oklahoma City Thunder': 29.05949660667784,
 'Philadelphia 76ers': -57.75079667969567,
 'Detroit Pistons': 20.925767241011044,
 'New Orleans Pelicans': -51.691203805151645,
 'Los Angeles Clippers': -3.1636837565140468,
 'Miami Heat': -22.664532090637522,
 'Toronto Raptors': -2.244928343566471,
 'Houston Rockets': 11.588398170698383,
 'Dallas Mavericks': -0.9937369277422476,
 'Sacramento Kings': 3.156160787536919,
 'Atlanta H

In [48]:
fig = elo_delta_plot(elo_deltas)
fig.show()

In [33]:
team = 'Miami Heat'
schedule[(schedule['Visitor/Neutral'] == team) | (schedule['Home/Neutral'] == team)]

Unnamed: 0,Date,Start (ET),Visitor/Neutral,away_pts,Home/Neutral,home_pts,Box Score,OT,Attend.,LOG,Arena,Notes
4,"Wed, Oct 23, 2024",7:30p,Orlando Magic,116,Miami Heat,97,Box Score,,19630,2:31,Kaseya Center,
29,"Sat, Oct 26, 2024",7:00p,Miami Heat,114,Charlotte Hornets,106,Box Score,,19102,2:23,Spectrum Center,
45,"Mon, Oct 28, 2024",7:30p,Detroit Pistons,98,Miami Heat,106,Box Score,,19626,2:18,Kaseya Center,
61,"Wed, Oct 30, 2024",7:30p,New York Knicks,116,Miami Heat,107,Box Score,,19620,2:10,Kaseya Center,
15,"Sat, Nov 2, 2024",9:30p,Miami Heat,118,Washington Wizards,98,Box Score,,20328,2:16,Mexico City Arena,
27,"Mon, Nov 4, 2024",8:15p,Sacramento Kings,111,Miami Heat,110,Box Score,,19604,2:20,Kaseya Center,
46,"Wed, Nov 6, 2024",9:00p,Miami Heat,112,Phoenix Suns,115,Box Score,,17071,2:20,Footprint Center,


In [14]:
elo_histories['Boston Celtics']

[1680.1906216232785,
 1687.5096634947834,
 1689.331437583457,
 1691.11059831677,
 1696.4198310669194,
 1682.4861888083092,
 1685.227860493281,
 1687.8957072623814,
 1671.7986924846837,
 1658.876543640449]