In [1]:
# Request the espn season leaders list

import requests
from bs4 import BeautifulSoup

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}

#We use headers to make the browser look like a user so you don't get blocked

res = requests.get("https://www.espn.com/nba/seasonleaders", headers=headers)
res.raise_for_status()

In [2]:
# Get a list of all the top 50 players in the 2024-2025 season

soup = BeautifulSoup(res.text, "lxml")

player_tbl = soup.find("table", {"class": "tablehead"})

raw_players = player_tbl.find_all("tr")
player_li = []
for player in raw_players:
  if "colhead" in player.get("class", []) or "stathead" in player.get("class", []):
    continue
  player_li.append(player)

In [3]:
# Filter top 50 players by their names

player_names = []
for player in player_li:
  for data in player.find_all("td"):
    name = data.find("a")
    if name:
      player_names.append(name.text)

### This section was used to extract the top 50 players according to the ESPN Season Leaders site for the 2024-2025 NBA season. We will be using these players to gather **stats** and **Points Asts props** to train our model to determine whether you should bet over or under on a certain day.

In [4]:
# Map top 50 player names to id's in nba_api

import numpy as np
import pandas as pd

from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.static import players

# bron = players.find_players_by_full_name('Lebron James')
# print(bron)

player_ids = []
for name in player_names:
  player = players.find_players_by_full_name(name)
  player_ids.append(player[0]['id'])

In [5]:
# Generate game logs for every game in the season each of the top 50 players played in

import time
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.static import players
import pandas as pd

season_type = "Regular Season"
season_date = "2024-25"

# Initialize an empty DataFrame to store all game logs
all_game_logs_df = pd.DataFrame()

for i, pid in enumerate(player_ids):
    try:
        game_logs = playergamelog.PlayerGameLog(player_id=pid, season=season_date, season_type_all_star=season_type)
        df = game_logs.get_data_frames()[0]

        player_name = player_names[i]
        df['Player_Name'] = player_name

        all_game_logs_df = pd.concat([all_game_logs_df, df], ignore_index=True)

        print(f"Successfully fetched {len(df)} game logs for {player_name} (ID: {pid}).")

    except Exception as e:
        print(f"An error occurred with nba_api for player ID {pid}: {e}")

    time.sleep(0.3) # Sleep to avoid hitting the API too hard

print("\nCombined DataFrame Head:")
print(all_game_logs_df.head())

print(f"\nTotal game logs collected: {len(all_game_logs_df)}")

Successfully fetched 70 game logs for Nikola Jokic (ID: 203999).
Successfully fetched 67 game logs for Giannis Antetokounmpo (ID: 203507).
Successfully fetched 76 game logs for Shai Gilgeous-Alexander (ID: 1628983).
Successfully fetched 50 game logs for Luka Doncic (ID: 1629029).
Successfully fetched 46 game logs for Victor Wembanyama (ID: 1641705).
Successfully fetched 51 game logs for Anthony Davis (ID: 203076).
Successfully fetched 70 game logs for Cade Cunningham (ID: 1630595).
Successfully fetched 70 game logs for LeBron James (ID: 2544).
Successfully fetched 72 game logs for Jayson Tatum (ID: 1628369).
Successfully fetched 72 game logs for Karl-Anthony Towns (ID: 1626157).
Successfully fetched 70 game logs for Domantas Sabonis (ID: 1627734).
Successfully fetched 62 game logs for Kevin Durant (ID: 201142).
Successfully fetched 76 game logs for Trae Young (ID: 1629027).
Successfully fetched 30 game logs for Zion Williamson (ID: 1629627).
Successfully fetched 79 game logs for Anthon

In [6]:
'''
Creating SQL database to store game logs so API requests aren't
blocked
'''

import os
import sqlite3

def singleton(cls):
  instances = {}

  def getinstance(*args, **kwargs):
    if cls not in instances:
      instances[cls] = cls(*args, **kwargs) # Return an instance of the class
    return instances[cls]
  return getinstance

class DatabaseDriver(object):
  def __init__(self):
    self.conn = sqlite3.connect('name.db', check_same_thread = False)
    self.create_table()

  def create_table(self):
    self.conn.execute('''
      CREATE TABLE IF NOT EXISTS game_logs (
        GAME_ID INTEGER,
        PLAYER_ID INTEGER,
        PLAYER_NAME TEXT,
        GAME_DATE TEXT,
        MATCHUP TEXT,
        WL TEXT,
        MIN INTEGER,
        FGM INTEGER,
        FGA INTEGER,
        FG_PCT REAL,
        FG3M INTEGER,
        FG3A INTEGER,
        FG3_PCT REAL,
        FTM INTEGER,
        FTA INTEGER,
        FT_PCT REAL,
        OREB INTEGER,
        DREB INTEGER,
        REB INTEGER,
        AST INTEGER,
        STL INTEGER,
        BLK INTEGER,
        TOV INTEGER,
        PF INTEGER,
        PTS INTEGER,
        PLUS_MINUS INTEGER,
        PRIMARY KEY (GAME_ID, PLAYER_ID)
      )
    ''')
    self.conn.commit()

  def insert_game_log(self, game_data):
      self.conn.execute('''INSERT OR IGNORE INTO game_logs (
          GAME_ID, PLAYER_ID, PLAYER_NAME, GAME_DATE, MATCHUP, WL, MIN, FGM, FGA, FG_PCT,
          FG3M, FG3A, FG3_PCT, FTM, FTA, FT_PCT, OREB, DREB, REB, AST, STL, BLK, TOV, PF,
          PTS, PLUS_MINUS
      )
      VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''', game_data)
      self.conn.commit()


  def delete_table(self):
    pass
                # self.conn.execute("sql code")

DatabaseDriver = singleton(DatabaseDriver)

In [7]:
# Inserting game_logs into SQL Database
db_driver = DatabaseDriver()

for index, row in all_game_logs_df.iterrows():
    game_data = (
        row['Game_ID'], row['Player_ID'], row['Player_Name'], row['GAME_DATE'],
        row['MATCHUP'], row['WL'], row['MIN'], row['FGM'], row['FGA'],
        row['FG_PCT'], row['FG3M'], row['FG3A'], row['FG3_PCT'], row['FTM'],
        row['FTA'], row['FT_PCT'], row['OREB'], row['DREB'], row['REB'],
        row['AST'], row['STL'], row['BLK'], row['TOV'], row['PF'], row['PTS'],
        row['PLUS_MINUS']
    )
    db_driver.insert_game_log(game_data)

print("Game logs inserted into the database.")

all_game_logs_df.head()

Game logs inserted into the database.


Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,Player_Name
0,22024,203999,22401193,"Apr 13, 2025",DEN @ HOU,W,31,7,10,0.7,...,7,7,2,1,1,3,18,34,1,Nikola Jokic
1,22024,203999,22401180,"Apr 11, 2025",DEN vs. MEM,W,41,11,19,0.579,...,16,13,2,0,5,3,26,11,1,Nikola Jokic
2,22024,203999,22401165,"Apr 09, 2025",DEN @ SAC,W,38,5,12,0.417,...,12,11,4,0,1,2,20,9,1,Nikola Jokic
3,22024,203999,22401142,"Apr 06, 2025",DEN vs. IND,L,39,18,33,0.545,...,15,13,2,1,4,0,41,1,1,Nikola Jokic
4,22024,203999,22401125,"Apr 04, 2025",DEN @ GSW,L,37,13,17,0.765,...,12,9,2,0,6,2,33,-4,1,Nikola Jokic


In [8]:
all_game_logs_df.head()

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,Player_Name
0,22024,203999,22401193,"Apr 13, 2025",DEN @ HOU,W,31,7,10,0.7,...,7,7,2,1,1,3,18,34,1,Nikola Jokic
1,22024,203999,22401180,"Apr 11, 2025",DEN vs. MEM,W,41,11,19,0.579,...,16,13,2,0,5,3,26,11,1,Nikola Jokic
2,22024,203999,22401165,"Apr 09, 2025",DEN @ SAC,W,38,5,12,0.417,...,12,11,4,0,1,2,20,9,1,Nikola Jokic
3,22024,203999,22401142,"Apr 06, 2025",DEN vs. IND,L,39,18,33,0.545,...,15,13,2,1,4,0,41,1,1,Nikola Jokic
4,22024,203999,22401125,"Apr 04, 2025",DEN @ GSW,L,37,13,17,0.765,...,12,9,2,0,6,2,33,-4,1,Nikola Jokic
