### Web Scraping NBA Stats for MVP Prediction (2024-2025 Regular Season)

##### Downloading Player Stats through Selenium
- This is to scrape a javascript page.

In [1]:
import requests

player_stats_url = "https://www.basketball-reference.com/leagues/NBA_2025_per_game.html"
url = player_stats_url.format(2025)
data = requests.get(url)

In [2]:
# Importing webdriver from selenium then import service
from selenium import webdriver
from selenium.webdriver.chrome.service import Service

# Define the path to the chromedriver
chrome_driver_path = "C:/chromedriver-win64/chromedriver.exe"

# Create a Service object
service = Service(chrome_driver_path)

# Initialize the Chrome driver with the Service object
driver = webdriver.Chrome(service=service)

In [3]:
import time 

year = 2025
url = player_stats_url.format(year)

driver.get(url)
driver.execute_script("window.scrollTo(1,10000)")
time.sleep(2)

html = driver.page_source
with open("player/2025.html".format(year), "w+", encoding="utf-8") as f:
    f.write(html)

##### Parsing the Stats with BeautifulSoup

In [6]:
from bs4 import BeautifulSoup
import pandas as pd
from io import StringIO

# Initialize an empty DataFrame
df = pd.DataFrame()

# Parsing players stats with BeautifulSoup
with open("player/2025.html".format(year), encoding="utf-8") as f:
    page = f.read()

soup = BeautifulSoup(page, "html.parser")
soup.find('tr', class_="thead").decompose()
soup.find('tr', class_="norank").decompose()
player_table = soup.find(id="per_game_stats")

# Wrap the HTML string in StringIO object
player = pd.read_html(StringIO(str(player_table)))[0]
player["Year"] = year

# Append the player DataFrame to dfs
df = pd.concat([df, player], ignore_index=True)

In [7]:
players = df
players

Unnamed: 0,Rk,Player,Age,Team,Pos,G,GS,MP,FG,FGA,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Awards,Year
0,1,Shai Gilgeous-Alexander,26,OKC,PG,76,76,34.2,11.3,21.8,...,4.1,5.0,6.4,1.7,1.0,2.4,2.2,32.7,,2025
1,2,Giannis Antetokounmpo,30,MIL,PF,67,67,34.2,11.8,19.7,...,9.7,11.9,6.5,0.9,1.2,3.1,2.3,30.4,,2025
2,3,Nikola Jokić,29,DEN,C,70,70,36.7,11.2,19.5,...,9.9,12.7,10.2,1.8,0.6,3.3,2.3,29.6,,2025
3,4,Luka Dončić,25,2TM,PG,50,50,35.4,9.2,20.5,...,7.4,8.2,7.7,1.8,0.4,3.6,2.5,28.2,,2025
4,4,Luka Dončić,25,DAL,PG,22,22,35.7,9.8,21.2,...,7.6,8.3,7.8,2.0,0.4,3.4,2.6,28.1,,2025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
753,565,Riley Minix,24,SAS,SF,1,0,7.0,0.0,1.0,...,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,,2025
754,566,Jahlil Okafor,29,IND,C,1,0,3.0,0.0,0.0,...,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,,2025
755,567,Zyon Pullin,23,MEM,SG,3,0,1.0,0.0,0.3,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,2025
756,568,Isaiah Stevens,24,MIA,PG,3,0,2.0,0.0,0.7,...,0.7,0.7,0.0,0.3,0.0,0.0,0.0,0.0,,2025


In [8]:
# Storing player stats to CSV file
players.to_csv("[2024-2025]_players.csv")

##### Downloading Team Data

In [9]:
# Assigning URL to team stats table
team_stats_url = "https://www.basketball-reference.com/leagues/NBA_2025_standings.html"

In [11]:
# Scraping team data and then store to "team" folder
url = team_stats_url.format(year)
data = requests.get(url)
with open("team/2025.html".format(year), "w+", encoding="utf-8") as f:
    f.write(data.text)

##### Parsing the Team Data with BeautifulSoup

In [12]:
# Initialize an empty DataFrame
df = pd.DataFrame()

# Parsing team stats with BeautifulSoup
with open("team/2025.html".format(year), encoding="utf-8") as f: 
    page = f.read()

# Parsing the Eastern Conference table standings
soup = BeautifulSoup(page, "html.parser")
soup.find('tr', class_="thead").decompose()
team_table = soup.find(id="divs_standings_E")
team = pd.read_html(StringIO(str(team_table)))[0] # Wrap the HTML string in StringIO object
team["Year"] = year
team["Team"] = team["Eastern Conference"]
del team["Eastern Conference"]
df = pd.concat([df, team], ignore_index=True) # Append the team DataFrame to dfs

# Parsing the Western Conference table standings
soup = BeautifulSoup(page, "html.parser")
soup.find('tr', class_="thead").decompose()
team_table = soup.find(id="divs_standings_W")
team = pd.read_html(StringIO(str(team_table)))[0] # Wrap the HTML string in StringIO object
team["Year"] = year
team["Team"] = team["Western Conference"]
del team["Western Conference"]
df = pd.concat([df, team], ignore_index=True) # Append the team DataFrame to dfs

In [13]:
teams = df
teams

Unnamed: 0,W,L,W/L%,GB,PS/G,PA/G,SRS,Year,Team
0,61,21,.744,—,116.3,107.2,8.28,2025,Boston Celtics* (2)
1,51,31,.622,10.0,115.8,111.7,3.59,2025,New York Knicks* (3)
2,30,52,.366,31.0,110.9,115.2,-4.40,2025,Toronto Raptors (11)
3,26,56,.317,35.0,105.1,112.2,-6.95,2025,Brooklyn Nets (12)
4,24,58,.293,37.0,109.6,115.8,-6.29,2025,Philadelphia 76ers (13)
5,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,2025,Central Division
6,64,18,.780,—,121.9,112.4,8.81,2025,Cleveland Cavaliers* (1)
7,50,32,.610,14.0,117.4,115.1,1.68,2025,Indiana Pacers* (4)
8,48,34,.585,16.0,115.5,113.0,2.12,2025,Milwaukee Bucks* (5)
9,44,38,.537,20.0,115.5,113.6,1.73,2025,Detroit Pistons* (6)


In [14]:
# Storing team stats to CSV file
teams.to_csv("[2024-2025]_teams.csv")