In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import pickle
from datetime import datetime

# plotting
import plotly.graph_objects as go 
import ast


 First step - grab the schedule! Each month's schedule is stored on a separate page, so we'll check each one based on the current date.

In [2]:
def get_schedule():

    months = ['october','november','december','january','february','march','april']
    current_month = datetime.now().strftime("%B").lower()
    
    schedule = pd.DataFrame()

    for month in months:
        url = f"https://www.basketball-reference.com/leagues/NBA_2025_games-{month}.html"

        # Add headers to mimic a browser request
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }

        try:
            # Make the request with a slight delay to be respectful to the server
            time.sleep(3)
            response = requests.get(url, headers=headers)
            response.raise_for_status()  # Raise an exception for bad status codes
            
            # Parse the HTML content
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Find the schedule table
            table = soup.find('table', id='schedule')
            
            if table is None:
                raise ValueError("Schedule table not found on the page")

            # Define the columns we want to extract
            columns = [
                'Date',
                'Start (ET)',
                'Visitor/Neutral',
                'away_pts',
                'Home/Neutral',
                'home_pts',
                'Box Score',
                'OT',
                'Attend.',
                'LOG',
                'Arena',
                'Notes'
            ]
            
            # Extract rows
            rows = []
            for row in table.find_all('tr')[1:]:  # Skip header row
                game_data = []
                cells = row.find_all(['td', 'th'])
                if cells:  # Only process rows with data
                    for cell in cells:
                        text = cell.text.strip()
                        game_data.append(text)
                    if game_data:  # Only append non-empty rows
                        rows.append(game_data)
            
            # Create DataFrame with explicit column names
            df = pd.DataFrame(rows, columns=columns)
            df = df[df['away_pts'] != '']
            schedule = pd.concat((schedule, df))
            
            if month == current_month:
                break
                



        except requests.RequestException as e:
            print(f"Error fetching the webpage: {e}")
        except Exception as e:
            print(f"Error processing the data: {e}")

    # Optionally save to CSV
    schedule.to_csv('2025_schedule.csv', index=False)
    print("\nData has been saved to '2025_schedule.csv'")
    return schedule

schedule = get_schedule()


Data has been saved to '2025_schedule.csv'


Now we calculate the Elo of each team game-by-game:

In [3]:
def expected_score(rating_a, rating_b):
    return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))

def update_elo(rating, expected, actual, k=20):
    """
    Update the Elo rating based on game result.
    :param rating: Current Elo rating
    :param expected: Expected score
    :param actual: Actual score (1 for win, 0 for loss)
    :param k: K-factor
    :return: Updated Elo rating
    """
    return rating + k * (actual - expected)


# Load Elos from end of previous season
with open('../2023-24/final_elos.pkl', 'rb') as f:
    current_elos = pickle.load(f) 

elo_histories = {}
team_names = list(schedule['Visitor/Neutral'].unique())
for name in team_names:
    elo_histories[name] = [current_elos[name]]

In [4]:
def calculate_elos(schedule, current_elos, elo_histories):
    '''
    schedule: pd.Dataframe containing game logs
    
    current_elos: {str: int} form dictionary showing each team's current Elo
    
    elo_histories: {str: [int]} form dictionary showing all Elo values throughout the season 
    (used for plotting later on)
    '''

    for index, row in schedule.iterrows(): # i know iterrows sucks and vectors are better,
                                    # but it's sequential data that's updated over time 

        away = row['Visitor/Neutral']
        away_score = int(row['away_pts'])
        home = row['Home/Neutral']
        home_score = int(row['home_pts'])
        home_elo = current_elos[home]
        away_elo = current_elos[away]

        if home_score > away_score:
            result = 1
        else:
            result = 0

        
        
        expected_home_score = expected_score(current_elos[home], current_elos[away])
        expected_away_score = expected_score(current_elos[away], current_elos[home])
        new_rating_home = update_elo(home_elo, expected_home_score, result)
        new_rating_away = update_elo(away_elo, expected_away_score, 1 - result)

        elo_histories[home].append(new_rating_home)
        elo_histories[away].append(new_rating_away)

        current_elos[home] = new_rating_home
        current_elos[away] = new_rating_away
    
    return current_elos, elo_histories

In [5]:
current_elos, elo_histories = calculate_elos(schedule, current_elos, elo_histories)
elo_histories

{'New York Knicks': [1584.711486893246,
  1577.3924450217412,
  1587.0328727564695,
  1575.4472350744682,
  1584.3491247154222,
  1587.538459571707,
  1575.2221808673369,
  1561.4969052278495,
  1569.3737008535081,
  1558.6333430491193,
  1566.3432979934373,
  1553.596351306668,
  1559.9980255193975,
  1566.083243727928,
  1569.124906693682,
  1579.9045686999918,
  1564.3092816278106,
  1575.855002283505,
  1566.2554381385053,
  1570.858270876248,
  1577.4033417997186],
 'Minnesota Timberwolves': [1607.4606388711322,
  1596.7761947867077,
  1604.835233202691,
  1608.3770782156566,
  1598.1244753089975,
  1608.7853915039536,
  1593.4975473430454,
  1597.5786190138465,
  1604.1168506440956,
  1607.8549464797095,
  1595.0720470023327,
  1578.7628461583001,
  1563.05247860449,
  1572.2656044295347,
  1583.6188490930217,
  1567.1542596934512,
  1560.4808043909313,
  1550.0281819182057,
  1538.3944134523535,
  1549.6430895393296],
 'Indiana Pacers': [1561.380655075586,
  1564.894236707712,
 

In [6]:
from plotting import elo_bar_plot, elo_line_plot, elo_delta_plot

fig = elo_bar_plot(current_elos)
fig.show()

In [7]:
fig = elo_line_plot(elo_histories)
fig.show()

In [8]:
elo_deltas = {name: 0 for name in team_names}

for name in team_names:
    history = elo_histories[name]
    delta = history[-1] - history[0]
    elo_deltas[name] = delta
    
elo_deltas

{'New York Knicks': -7.308145093527401,
 'Minnesota Timberwolves': -57.81754933180264,
 'Indiana Pacers': -62.55743155658024,
 'Brooklyn Nets': 27.815033710354328,
 'Orlando Magic': 52.9879675287641,
 'Milwaukee Bucks': -3.87695236723539,
 'Cleveland Cavaliers': 93.1957396600028,
 'Charlotte Hornets': -6.536076758795161,
 'Chicago Bulls': -32.596106737873924,
 'Memphis Grizzlies': 106.77390700508522,
 'Phoenix Suns': -7.983560076386993,
 'Golden State Warriors': 6.093843859823892,
 'Boston Celtics': 11.804811511269236,
 'San Antonio Spurs': 64.86146371926134,
 'Oklahoma City Thunder': 28.01507461471283,
 'Philadelphia 76ers': -95.17768432762955,
 'Detroit Pistons': 46.53208003387067,
 'New Orleans Pelicans': -134.3829404618266,
 'Los Angeles Clippers': 18.80496356005142,
 'Miami Heat': -24.477986690179478,
 'Toronto Raptors': 16.189899286618584,
 'Houston Rockets': 61.739911633352676,
 'Dallas Mavericks': 7.324306364136419,
 'Sacramento Kings': -48.820387255869036,
 'Atlanta Hawks': 12

In [9]:
fig = elo_delta_plot(elo_deltas)
fig.show()

In [10]:
team = 'Miami Heat'
schedule[(schedule['Visitor/Neutral'] == team) | (schedule['Home/Neutral'] == team)]

Unnamed: 0,Date,Start (ET),Visitor/Neutral,away_pts,Home/Neutral,home_pts,Box Score,OT,Attend.,LOG,Arena,Notes
4,"Wed, Oct 23, 2024",7:30p,Orlando Magic,116,Miami Heat,97,Box Score,,19630,2:31,Kaseya Center,
29,"Sat, Oct 26, 2024",7:00p,Miami Heat,114,Charlotte Hornets,106,Box Score,,19102,2:23,Spectrum Center,
45,"Mon, Oct 28, 2024",7:30p,Detroit Pistons,98,Miami Heat,106,Box Score,,19626,2:18,Kaseya Center,
61,"Wed, Oct 30, 2024",7:30p,New York Knicks,116,Miami Heat,107,Box Score,,19620,2:10,Kaseya Center,
15,"Sat, Nov 2, 2024",9:30p,Miami Heat,118,Washington Wizards,98,Box Score,,20328,2:16,Mexico City Arena,
27,"Mon, Nov 4, 2024",8:15p,Sacramento Kings,111,Miami Heat,110,Box Score,,19604,2:20,Kaseya Center,
46,"Wed, Nov 6, 2024",9:00p,Miami Heat,112,Phoenix Suns,115,Box Score,,17071,2:20,Footprint Center,
61,"Fri, Nov 8, 2024",9:00p,Miami Heat,122,Denver Nuggets,135,Box Score,,19621,2:11,Ball Arena,
73,"Sun, Nov 10, 2024",7:00p,Miami Heat,95,Minnesota Timberwolves,94,Box Score,,18978,2:28,Target Center,
86,"Tue, Nov 12, 2024",7:00p,Miami Heat,121,Detroit Pistons,123,Box Score,OT,17806,2:36,Little Caesars Arena,In-Season Tournament


In [11]:
elo_histories['Boston Celtics']

[1680.1906216232785,
 1687.5096634947834,
 1689.331437583457,
 1691.11059831677,
 1696.4201135840008,
 1682.0497237491513,
 1684.7913228479356,
 1687.459100615274,
 1691.395989549678,
 1678.9191612123145,
 1682.6464461959156,
 1687.3693335728622,
 1671.0720264570168,
 1675.006229134539,
 1677.1030490303629,
 1685.6741919366432,
 1687.3009814017894,
 1693.9744367043093,
 1700.9718011288626,
 1704.9652315881642,
 1691.9954331345477]