In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import pickle
from datetime import datetime

# plotting
import plotly.graph_objects as go 
import ast


 First step - grab the schedule! Each month's schedule is stored on a separate page, so we'll check each one based on the current date.

In [2]:
def get_schedule():

    months = ['october','november','december','january','february','march','april']
    current_month = datetime.now().strftime("%B").lower()
    
    schedule = pd.DataFrame()

    for month in months:
        url = f"https://www.basketball-reference.com/leagues/NBA_2025_games-{month}.html"

        # Add headers to mimic a browser request
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }

        try:
            # Make the request with a slight delay to be respectful to the server
            time.sleep(3)
            response = requests.get(url, headers=headers)
            response.raise_for_status()  # Raise an exception for bad status codes
            
            # Parse the HTML content
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Find the schedule table
            table = soup.find('table', id='schedule')
            
            if table is None:
                raise ValueError("Schedule table not found on the page")

            # Define the columns we want to extract
            columns = [
                'Date',
                'Start (ET)',
                'Visitor/Neutral',
                'away_pts',
                'Home/Neutral',
                'home_pts',
                'Box Score',
                'OT',
                'Attend.',
                'LOG',
                'Arena',
                'Notes'
            ]
            
            # Extract rows
            rows = []
            for row in table.find_all('tr')[1:]:  # Skip header row
                game_data = []
                cells = row.find_all(['td', 'th'])
                if cells:  # Only process rows with data
                    for cell in cells:
                        text = cell.text.strip()
                        game_data.append(text)
                    if game_data:  # Only append non-empty rows
                        rows.append(game_data)
            
            # Create DataFrame with explicit column names
            df = pd.DataFrame(rows, columns=columns)
            df = df[df['away_pts'] != '']
            schedule = pd.concat((schedule, df))
            
            if month == current_month:
                break
                



        except requests.RequestException as e:
            print(f"Error fetching the webpage: {e}")
        except Exception as e:
            print(f"Error processing the data: {e}")

    # Optionally save to CSV
    schedule.to_csv('2025_schedule.csv', index=False)
    print("\nData has been saved to '2025_schedule.csv'")
    return schedule

schedule = get_schedule()


Data has been saved to '2025_schedule.csv'


Now we calculate the Elo of each team game-by-game:

In [3]:
def expected_score(rating_a, rating_b):
    return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))

def update_elo(rating, expected, actual, k=20):
    """
    Update the Elo rating based on game result.
    :param rating: Current Elo rating
    :param expected: Expected score
    :param actual: Actual score (1 for win, 0 for loss)
    :param k: K-factor
    :return: Updated Elo rating
    """
    return rating + k * (actual - expected)


# Load Elos from end of previous season
with open('../2023-24/final_elos.pkl', 'rb') as f:
    current_elos = pickle.load(f) 

elo_histories = {}
team_names = list(schedule['Visitor/Neutral'].unique())
for name in team_names:
    elo_histories[name] = [current_elos[name]]

In [4]:
def calculate_elos(schedule, current_elos, elo_histories):
    '''
    schedule: pd.Dataframe containing game logs
    
    current_elos: {str: int} form dictionary showing each team's current Elo
    
    elo_histories: {str: [int]} form dictionary showing all Elo values throughout the season 
    (used for plotting later on)
    '''
    for index, row in schedule.iterrows(): # i know iterrows sucks and vectors are better,
                                    # but it's sequential data that's updated over time 
        away = row['Visitor/Neutral']
        away_score = row['away_pts']
        home = row['Home/Neutral']
        home_score = row['home_pts']

        home_elo = current_elos[home]
        away_elo = current_elos[away]

        if home_score > away_score:
            result = 1
        else:
            result = 0
        
        expected_home_score = expected_score(current_elos[home], current_elos[away])
        expected_away_score = expected_score(current_elos[away], current_elos[home])
        new_rating_home = update_elo(home_elo, expected_home_score, result)
        new_rating_away = update_elo(away_elo, expected_away_score, 1 - result)

        elo_histories[home].append(new_rating_home)
        elo_histories[away].append(new_rating_away)

        current_elos[home] = new_rating_home
        current_elos[away] = new_rating_away
    
    return current_elos, elo_histories

In [5]:
current_elos, elo_histories = calculate_elos(schedule, current_elos, elo_histories)
elo_histories

{'New York Knicks': [1584.711486893246,
  1577.3924450217412,
  1567.0328727564695,
  1556.0129470825818,
  1565.4478248023324,
  1548.9391112150938,
  1557.667426174039,
  1544.8710582410674],
 'Minnesota Timberwolves': [1607.4606388711322,
  1596.7761947867077,
  1604.835233202691,
  1608.3770782156566,
  1598.1363097166347,
  1609.3321130835805,
  1594.0294732269074,
  1578.1088571695577,
  1585.6577463190756],
 'Indiana Pacers': [1561.380655075586,
  1564.894236707712,
  1575.2538089729837,
  1563.4025361748818,
  1551.965175740892,
  1565.8988179995022,
  1555.9650349794993,
  1566.8025999560234,
  1576.2748268289931],
 'Brooklyn Nets': [1410.8879283939752,
  1402.1655332295607,
  1395.444329864287,
  1409.0832614728583,
  1404.8134373273097,
  1414.2405224144347,
  1426.7471921467818,
  1433.5612599341264,
  1442.1264635239777],
 'Orlando Magic': [1530.2250287664333,
  1520.4502066503037,
  1527.1714100155773,
  1513.1152784696637,
  1524.5526389036536,
  1534.2171373245642,
  15

In [6]:
from plotting import elo_bar_plot, elo_line_plot, elo_delta_plot

fig = elo_bar_plot(current_elos)
fig.show()

In [7]:
fig = elo_line_plot(elo_histories)
fig.show()

In [8]:
elo_deltas = {name: 0 for name in team_names}

for name in team_names:
    history = elo_histories[name]
    delta = history[-1] - history[0]
    elo_deltas[name] = delta
    
elo_deltas

{'New York Knicks': -39.840428652178616,
 'Minnesota Timberwolves': -21.80289255205662,
 'Indiana Pacers': 14.894171753407136,
 'Brooklyn Nets': 31.238535130002447,
 'Orlando Magic': 8.751573961247914,
 'Milwaukee Bucks': -22.979011933900665,
 'Cleveland Cavaliers': 71.12845071452534,
 'Charlotte Hornets': 30.483031851244505,
 'Chicago Bulls': -4.7636741389135295,
 'Memphis Grizzlies': 16.140802993853413,
 'Phoenix Suns': 24.886703180247423,
 'Golden State Warriors': 4.912787916210391,
 'Boston Celtics': -21.31407798282953,
 'San Antonio Spurs': 17.37645814595612,
 'Oklahoma City Thunder': -60.23162809834503,
 'Philadelphia 76ers': -18.85942514310068,
 'Detroit Pistons': 20.699427349685948,
 'New Orleans Pelicans': -33.40993933035429,
 'Los Angeles Clippers': -4.84436569723357,
 'Miami Heat': -44.329010130980805,
 'Toronto Raptors': -3.278253521401666,
 'Houston Rockets': -9.915239834924023,
 'Dallas Mavericks': -39.00127183312975,
 'Sacramento Kings': -32.69982935239295,
 'Atlanta Haw

In [9]:
fig = elo_delta_plot(elo_deltas)
fig.show()

In [12]:
team = 'Boston Celtics'
schedule[(schedule['Visitor/Neutral'] == team) | (schedule['Home/Neutral'] == team)]

Unnamed: 0,Date,Start (ET),Visitor/Neutral,away_pts,Home/Neutral,home_pts,Box Score,OT,Attend.,LOG,Arena,Notes
0,"Tue, Oct 22, 2024",7:30p,New York Knicks,109,Boston Celtics,132,Box Score,,19156,2:04,TD Garden,
12,"Thu, Oct 24, 2024",7:00p,Boston Celtics,122,Washington Wizards,102,Box Score,,18610,2:11,Capital One Arena,
28,"Sat, Oct 26, 2024",7:00p,Boston Celtics,124,Detroit Pistons,118,Box Score,,19311,2:00,Little Caesars Arena,
43,"Mon, Oct 28, 2024",7:30p,Milwaukee Bucks,108,Boston Celtics,119,Box Score,,19156,2:18,TD Garden,
58,"Wed, Oct 30, 2024",7:00p,Boston Celtics,132,Indiana Pacers,135,Box Score,OT,17274,2:37,Gainbridge Fieldhouse,
0,"Fri, Nov 1, 2024",7:00p,Boston Celtics,124,Charlotte Hornets,109,Box Score,,18557,2:17,Spectrum Center,
9,"Sat, Nov 2, 2024",6:00p,Boston Celtics,113,Charlotte Hornets,103,Box Score,,19253,2:09,Spectrum Center,
25,"Mon, Nov 4, 2024",7:45p,Boston Celtics,123,Atlanta Hawks,93,Box Score,,15031,1:56,State Farm Arena,
40,"Wed, Nov 6, 2024",7:30p,Golden State Warriors,118,Boston Celtics,112,Box Score,,19156,2:21,TD Garden,


In [13]:
elo_histories['Boston Celtics']

[1680.1906216232785,
 1687.5096634947834,
 1689.331437583457,
 1691.11059831677,
 1696.4198310669194,
 1682.4861888083092,
 1685.227860493281,
 1687.8957072623814,
 1671.7986924846837,
 1658.876543640449]