In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

Get player links

In [15]:
from IPython.display import clear_output
base_url = 'https://www.nbadraft.net/players/'

player_links = []

for page_num in range(1, 391):
    if page_num == 1:
        page_url = base_url
    else:
        page_url = f'{base_url}page/{page_num}/'
    
    # Send a GET request to the page URL
    response = requests.get(page_url)
    
    if response.status_code == 200:
        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.content, "html.parser")
        
        # parent div containing all player links
        parent_div = soup.find('div', class_='wf-container')

        # Find all the player links on the page
        links = parent_div.find_all('a')
        
        # extract href attribute and append to player_links
        n = 0
        for link in links:
            link = link['href']
            if base_url in link:
                player_links.append(link)
                n += 1
        clear_output(wait=True)
        print(f'Found {n} player links in {page_url}')

Found 24 player links in https://www.nbadraft.net/players/page/390/


Save as a txt file because that took a while and we don't want to do it again

In [16]:
with open('../data/nbadraft_player_links_20240625.txt', 'w') as file:
    for link in player_links:
        file.write(link + "\n")

Scrape player links for strengths/weaknesses

In [17]:
import numpy as np
f_links = '../data/nbadraft_player_links_20240625.txt'

with open(f_links, 'r') as file:
    player_links = file.readlines()

player_links = [link.strip() for link in player_links]

In [18]:
# for some reason we have duplciates - only get unique
import numpy as np
player_links = list(np.unique(player_links))

In [20]:
from IPython.display import clear_output

data = []
missing_players = []

# ind = player_links.index(player_link)
ind = 0

for player_link in player_links[ind:]:
    # for testing
    # player_link = 'https://www.nbadraft.net/players/james-harden/'
    # player_link = 'https://www.nbadraft.net/players/amen-thompson/'
    # player_link = 'https://www.nbadraft.net/players/acie-law'

    clear_output(wait=True)
    player_name = player_link.rsplit('/',2)[-2]
    # if player_name in df_old.player.values:
    #     break
    weak_obj = None
    strength_obj = None
    response = requests.get(player_link)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        for identify in ['b', 'strong']:

            # GET STRENGTHS TEXT
            # loop for all potential strings
            for text in ['Strengths:', 'Strengths: ']:
                strength_obj = soup.find(identify, text=text) # first is most recent so we use that
                if strength_obj is not None:
                    break
            if strength_obj is None:
                continue

            # GET WEAKNESSES TEXT
            # loop through all potential strings
            for text in ['Weaknesses:', 'Weakness:', 'Weaknesses: ']:
                weak_obj = soup.find(identify, text=text)
                if weak_obj is not None:
                    break

            if (strength_obj is not None) & (weak_obj is not None):
                break

        if (weak_obj is None) | (strength_obj is None):
            print(f"Failed to scrape data for {player_name}. No weaknesses available")
            continue
        try:
            strength_text = strength_obj.find_next_sibling(text=True).strip()
            weak_text = weak_obj.find_next_sibling(text=True).strip()
        except AttributeError:
            try:
                strength_text = strength_obj.find_next_sibling().text.strip()
                weak_text = strength_obj.find_next_sibling().text.strip()
            except AttributeError:
                continue
        try:
            year = int(soup.find('div',class_='mock-year').find('span', class_='label').text[:4])
        # missing year is allowed
        except AttributeError:
            year = np.nan
        try:
            overall = int(soup.find('div',class_='overall').find('span', class_='value').text)
        # missing overall attribute not allowed
        except AttributeError:
            continue
        player_attr_obj = soup.find('div', class_='player-attributes')

        # attribute scores
        attr_values = player_attr_obj.find_all('div', class_='div-table-cell attribute-value')
        attr_names = player_attr_obj.find_all('div', class_='div-table-cell attribute-name')
        attr_dict = {}
        try:
            for name, value in zip(attr_names, attr_values):
                attr_dict[name.text.replace(' ', '')] = int(value.text)
        # don't allow missing attribute values
        except ValueError:
            continue
            
        # Append the data to the list
        tmp_dict = {
            "player": player_name,
            "draft_year": year,
            "strengths": strength_text,
            "weaknesses": weak_text,
            'overall': overall
        }
        tmp_dict.update(attr_dict)
        data.append(tmp_dict)
        print(f'Success! Scraped strengths/weaknesses for {player_name}')
        # break
    else:
        missing_players.append(player_name)
        print(f"Failed to scrape data for {player_name}. Status code: {response.status_code}")
        # break

    # break

# Create a pandas dataframe from the scraped data
df = pd.DataFrame(data)

Failed to scrape data for zz-clark. No weaknesses available


In [21]:
df.to_csv('../data/nbadraft_strengths_weaknesses_20240625.csv')

In [22]:
df

Unnamed: 0,player,draft_year,strengths,weaknesses,overall,Athleticism,Size,Defense,Strength,Quickness,Leadership,JumpShot,NBAReady,Rebounding,Potential,PostSkills,Intangibles,BallHandling,Passing
0,aaron-bradshaw,2025.0,7’0 big man … Good size and length with a repo...,Has gotten stronger over his senior year but s...,94,8,9,9,7,8,7,7,7,8.0,9,7.0,8,,
1,aaron-brooks,,Brooks is an above average athlete with great ...,Slight build and his size of 5-11 could make h...,90,8,6,6,5,9,8,9,8,,8,,8,8.0,7.0
2,aaron-craft,,Excellent on-ball defender with an incredible ...,Doesn’t have NBA measurable or athleticism. H...,84,6,7,8,8,7,8,5,7,,6,,7,7.0,8.0
3,aaron-gordon,2014.0,A freakish athlete with incredible explosivene...,Gordon’s main weaknesses revolve around his la...,92,9,8,8,7,8,8,6,8,7.0,8,6.0,9,,
4,aaron-gray,,Unlike many post players Gray doesnt put the b...,Athletically Gray is not exceptionally quick o...,89,5,9,7,9,5,8,8,8,8.0,6,8.0,8,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1466,zhaire-smith,2018.0,Has an underrated feel for the game … High bas...,"As promising as he is, he’s still a project of...",92,10,8,8,7,8,7,7,7,,9,,7,7.0,7.0
1467,zhou-qi,2016.0,"Center prospect with great size, length and mo...",Lacks overall strength and weight to be effect...,86,7,10,8,5,8,6,7,6,7.0,9,7.0,6,,
1468,ziaire-williams,2021.0,High level athlete who uses his size and verti...,Williams struggled to stay on the court for th...,95,8,9,8,7,8,7,8,7,,9,,8,8.0,8.0
1469,ziga-samar,,"Crafty point guard, who plays the game at his ...",Has some ups and downs… Average athlete… Narro...,86,7,8,7,7,7,7,7,7,,7,,7,7.0,8.0
