In [72]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd

In [103]:
## 2024 Strength Data

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

url1 = "https://www.nba.com/stats/draft/combine-strength-agility"
driver.get(url1)

html = driver.page_source
soup = BeautifulSoup(html)
table = soup.find('table')
rows = table.find_all('tr')
table_data1 = []
headers1 = [header.text.strip() for header in rows[0].find_all('th')] if rows else []
for row in rows[1:]:
    columns = row.find_all('td')
    if columns:
        data = [col.text.strip() for col in columns]
        table_data1.append(data)

strength_2024 = pd.DataFrame(table_data1, columns=headers1)
strength_2024['Year'] = 2024

driver.quit()

In [104]:
## 2023 Strength Data

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

url2 = "https://www.nba.com/stats/draft/combine-strength-agility?SeasonYear=2023-24"
driver.get(url2)

html = driver.page_source
soup = BeautifulSoup(html)
table = soup.find('table')
rows = table.find_all('tr')
table_data2 = []
headers2 = [header.text.strip() for header in rows[0].find_all('th')] if rows else []
for row in rows[1:]:
    columns = row.find_all('td')
    if columns:
        data = [col.text.strip() for col in columns]
        table_data2.append(data)

strength_2023 = pd.DataFrame(table_data2, columns=headers2)
strength_2023['Year'] = 2023

driver.quit()

In [105]:
## 2022 Strength Data

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

url3 = "https://www.nba.com/stats/draft/combine-strength-agility?SeasonYear=2022-23"
driver.get(url3)

html = driver.page_source
soup = BeautifulSoup(html)
table = soup.find('table')
rows = table.find_all('tr')
table_data3 = []
headers3 = [header.text.strip() for header in rows[0].find_all('th')] if rows else []
for row in rows[1:]:
    columns = row.find_all('td')
    if columns:
        data = [col.text.strip() for col in columns]
        table_data3.append(data)

strength_2022 = pd.DataFrame(table_data3, columns=headers3)
strength_2022['Year'] = 2022

driver.quit()

In [106]:
## Combine Strength Data 2022-2024
strength_data = pd.concat([strength_2024, strength_2023.iloc[1:], strength_2022.iloc[1:]])
strength_data.rename(columns={'PLAYER':'Player'}, inplace=True)
strength_data = strength_data.drop('Max Bench Press (repetitions)', axis=1)
len(strength_data)

strength_data['Lane Agility Time (seconds)'] = pd.to_numeric(strength_data['Lane Agility Time (seconds)'], errors='coerce')
strength_data['Shuttle Run (seconds)'] = pd.to_numeric(strength_data['Shuttle Run (seconds)'], errors='coerce')
strength_data['Three Quarter Sprint (seconds)'] = pd.to_numeric(strength_data['Three Quarter Sprint (seconds)'], errors='coerce')
strength_data['Standing Vertical Leap (inches)'] = pd.to_numeric(strength_data['Standing Vertical Leap (inches)'], errors='coerce')
strength_data['Max Vertical Leap (inches)'] = pd.to_numeric(strength_data['Max Vertical Leap (inches)'], errors='coerce')

print(strength_data.dtypes)

Player                              object
POS                                 object
Lane Agility Time (seconds)        float64
Shuttle Run (seconds)              float64
Three Quarter Sprint (seconds)     float64
Standing Vertical Leap (inches)    float64
Max Vertical Leap (inches)         float64
Year                                 int64
dtype: object


In [107]:
## 2024 Draft History 
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

url4 = "https://www.nba.com/stats/draft/history?Season=2024"
driver.get(url4)

all_button = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, '//*[text()="All"]'))
)
all_button.click()

page_source = driver.page_source
soup = BeautifulSoup(page_source)


table = soup.find('table')
rows = table.find_all('tr')
table_data4 = []
headers4 = [header.text.strip() for header in rows[0].find_all('th')] if rows else []
for row in rows[1:]:
    columns = row.find_all('td')
    if columns:
        data = []
        for i, col in enumerate(columns):
            if i < 2:
                a_tag = col.find('a')
                if a_tag:
                    data.append(a_tag.text.strip())
                else:
                    data.append(col.text.strip())
            else:
                data.append(col.text.strip())
        while len(data) < len(headers4):
            data.append(None)
        if len(data) == len(headers4):
            table_data4.append(data)


draft_2024 = pd.DataFrame(table_data4, columns=headers4)
print(draft_2024)

driver.quit()

                  Player                    Team                 Affiliation  \
0     Zaccharie Risacher           Atlanta Hawks           JL Bourg (France)   
1              Alex Sarr      Washington Wizards  Perth Wildcats (Australia)   
2          Reed Sheppard         Houston Rockets                    Kentucky   
3         Stephon Castle       San Antonio Spurs                 Connecticut   
4      Ronald Holland II         Detroit Pistons           Ignite (G League)   
5         Tidjane Salaün       Charlotte Hornets      Cholet Basket (France)   
6        Donovan Clingan  Portland Trail Blazers                 Connecticut   
7         Rob Dillingham       San Antonio Spurs                    Kentucky   
8              Zach Edey       Memphis Grizzlies                      Purdue   
9          Cody Williams               Utah Jazz                    Colorado   
10         Matas Buzelis           Chicago Bulls           Ignite (G League)   
11          Nikola Topić   Oklahoma City

In [108]:
## 2023 Draft History 
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

url5 = "https://www.nba.com/stats/draft/history?Season=2023"
driver.get(url5)

all_button = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, '//*[text()="All"]'))
)
all_button.click()

page_source = driver.page_source
soup = BeautifulSoup(page_source)


table = soup.find('table')
rows = table.find_all('tr')
table_data5 = []
headers5 = [header.text.strip() for header in rows[0].find_all('th')] if rows else []
for row in rows[1:]:
    columns = row.find_all('td')
    if columns:
        data = []
        for i, col in enumerate(columns):
            if i < 2:
                a_tag = col.find('a')
                if a_tag:
                    data.append(a_tag.text.strip())
                else:
                    data.append(col.text.strip())
            else:
                data.append(col.text.strip())
        while len(data) < len(headers5):
            data.append(None)
        if len(data) == len(headers5):
            table_data5.append(data)


draft_2023 = pd.DataFrame(table_data5, columns=headers5)
print(draft_2023)

driver.quit()

                     Player                    Team  \
0         Victor Wembanyama       San Antonio Spurs   
1            Brandon Miller       Charlotte Hornets   
2           Scoot Henderson  Portland Trail Blazers   
3             Amen Thompson         Houston Rockets   
4            Ausar Thompson         Detroit Pistons   
5             Anthony Black           Orlando Magic   
6           Bilal Coulibaly          Indiana Pacers   
7             Jarace Walker      Washington Wizards   
8          Taylor Hendricks               Utah Jazz   
9             Cason Wallace        Dallas Mavericks   
10              Jett Howard           Orlando Magic   
11         Dereck Lively II   Oklahoma City Thunder   
12              Gradey Dick         Toronto Raptors   
13           Jordan Hawkins    New Orleans Pelicans   
14              Kobe Bufkin           Atlanta Hawks   
15           Keyonte George               Utah Jazz   
16      Jalen Hood-Schifino      Los Angeles Lakers   
17        

In [109]:
## 2022 Draft History 
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

url6 = "https://www.nba.com/stats/draft/history?Season=2022"
driver.get(url6)

all_button = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, '//*[text()="All"]'))
)
all_button.click()

page_source = driver.page_source
soup = BeautifulSoup(page_source)


table = soup.find('table')
rows = table.find_all('tr')
table_data6 = []
headers6 = [header.text.strip() for header in rows[0].find_all('th')] if rows else []
for row in rows[1:]:
    columns = row.find_all('td')
    if columns:
        data = []
        for i, col in enumerate(columns):
            if i < 2:
                a_tag = col.find('a')
                if a_tag:
                    data.append(a_tag.text.strip())
                else:
                    data.append(col.text.strip())
            else:
                data.append(col.text.strip())
        while len(data) < len(headers6):
            data.append(None)
        if len(data) == len(headers6):
            table_data6.append(data)


draft_2022 = pd.DataFrame(table_data6, columns=headers5)
print(draft_2022)

driver.quit()

                 Player                    Team  \
0        Paolo Banchero           Orlando Magic   
1         Chet Holmgren   Oklahoma City Thunder   
2      Jabari Smith Jr.         Houston Rockets   
3         Keegan Murray        Sacramento Kings   
4            Jaden Ivey         Detroit Pistons   
5    Bennedict Mathurin          Indiana Pacers   
6        Shaedon Sharpe  Portland Trail Blazers   
7         Dyson Daniels    New Orleans Pelicans   
8         Jeremy Sochan       San Antonio Spurs   
9          Johnny Davis      Washington Wizards   
10        Ousmane Dieng         New York Knicks   
11       Jalen Williams   Oklahoma City Thunder   
12          Jalen Duren       Charlotte Hornets   
13         Ochai Agbaji     Cleveland Cavaliers   
14        Mark Williams       Charlotte Hornets   
15           AJ Griffin           Atlanta Hawks   
16           Tari Eason         Houston Rockets   
17          Dalen Terry           Chicago Bulls   
18         Jake LaRavia  Minnes

In [110]:
## Draft History Data 2022-2024
draft_data = pd.concat([draft_2024, draft_2023.iloc[1:], draft_2022.iloc[1:]])


In [111]:
draft_data['Year'] = draft_data['Year'].astype(int)
draft_data['RoundNumber'] = pd.to_numeric(draft_data['RoundNumber'], errors='coerce')
draft_data['RoundPick'] = pd.to_numeric(draft_data['RoundPick'], errors='coerce')
draft_data['OverallPick'] = pd.to_numeric(draft_data['OverallPick'], errors='coerce')
len(draft_data)

print(draft_data.dtypes)

Player         object
Team           object
Affiliation    object
Year            int64
RoundNumber     int64
RoundPick       int64
OverallPick     int64
dtype: object


In [112]:
print(strength_data['Year'].dtype)
print(draft_data['Year'].dtype)

int64
int64


In [113]:
merged = pd.merge(strength_data, draft_data, how = 'left', on = ["Player", "Year"])
print(merged)
len(merged)

merged.to_csv('project_data.csv', index=False)

               Player    POS  Lane Agility Time (seconds)  \
0       Michael Ajayi     SF                        11.77   
1       Melvin Ajinca     SF                        11.78   
2      Trey Alexander     SG                        11.37   
3        Izan Almansa      C                        10.94   
4       Reece Beekman     PG                        10.86   
..                ...    ...                          ...   
240  Trevion Williams      C                        12.75   
241   Bryson Williams   PF-C                          NaN   
242     Mark Williams      C                          NaN   
243      Jalen Wilson  SF-PF                          NaN   
244        Fanbo Zeng  SF-PF                          NaN   

     Shuttle Run (seconds)  Three Quarter Sprint (seconds)  \
0                     3.17                            3.21   
1                     3.00                            3.38   
2                     2.92                            3.21   
3                  

In [114]:
print(merged.dtypes)

Player                              object
POS                                 object
Lane Agility Time (seconds)        float64
Shuttle Run (seconds)              float64
Three Quarter Sprint (seconds)     float64
Standing Vertical Leap (inches)    float64
Max Vertical Leap (inches)         float64
Year                                 int64
Team                                object
Affiliation                         object
RoundNumber                        float64
RoundPick                          float64
OverallPick                        float64
dtype: object
