In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

# Define a function to get the urls for each power forward's personal page
def get_pf_urls(season_url):
    response = requests.get(season_url)
    soup = BeautifulSoup(response.content, 'lxml')
    players_table = soup.find('table', {'id': 'advanced_stats'})
    players = []
    for row in players_table.tbody.find_all('tr'):
        pos_cell = row.find('td', {'data-stat': 'pos'})
        if pos_cell is not None:  # Check if the 'pos' cell exists
            position = pos_cell.text
            if 'PF' in position:
                player_link = row.find('td', {'data-stat': 'player'}).a['href']
                player_name = row.find('td', {'data-stat': 'player'}).text
                players.append((player_name, 'https://www.basketball-reference.com' + player_link))
    return players

season_2014_url = "https://www.basketball-reference.com/leagues/NBA_2014_advanced.html"
season_2024_url = "https://www.basketball-reference.com/leagues/NBA_2024_advanced.html"
pf_players_2014 = get_pf_urls(season_2014_url)
pf_players_2024 = get_pf_urls(season_2024_url)

In [2]:
pf_players_2014

[('Jeff Adrien',
  'https://www.basketball-reference.com/players/a/adrieje01.html'),
 ('Jeff Adrien',
  'https://www.basketball-reference.com/players/a/adrieje01.html'),
 ('Jeff Adrien',
  'https://www.basketball-reference.com/players/a/adrieje01.html'),
 ('LaMarcus Aldridge',
  'https://www.basketball-reference.com/players/a/aldrila01.html'),
 ('Lavoy Allen',
  'https://www.basketball-reference.com/players/a/allenla01.html'),
 ('Lavoy Allen',
  'https://www.basketball-reference.com/players/a/allenla01.html'),
 ('Lavoy Allen',
  'https://www.basketball-reference.com/players/a/allenla01.html'),
 ('Lou Amundson',
  'https://www.basketball-reference.com/players/a/amundlo01.html'),
 ('Lou Amundson',
  'https://www.basketball-reference.com/players/a/amundlo01.html'),
 ('Lou Amundson',
  'https://www.basketball-reference.com/players/a/amundlo01.html'),
 ('Ryan Anderson',
  'https://www.basketball-reference.com/players/a/anderry01.html'),
 ('Carmelo Anthony',
  'https://www.basketball-referen

In [12]:
from selenium import webdriver
from bs4 import BeautifulSoup
import time

def get_shooting_data(player_url, season, player_name):
    # Initialize Selenium WebDriver
    driver = webdriver.Chrome()  # Make sure you have the ChromeDriver installed and in PATH
    driver.get(player_url)
    
    # Wait for the page to fully load (you might need to adjust this delay)
    time.sleep(10)
    
    # Get page source and parse it with BeautifulSoup
    soup = BeautifulSoup(driver.page_source, 'lxml')
    
    # Close the driver after page load
    driver.quit()
    
    # List of possible table IDs
    table_ids = ['shooting_sh', 'div_shooting', 'shooting']
    
    shooting_table = None
    for table_id in table_ids:
        shooting_table = soup.find('table', {'id': table_id})
        if shooting_table is not None:
            break
    
    if shooting_table is None:
        print(f"No shooting data found for {player_url}")
        return []
    
    # Find the headers
    headers = [th.text.strip() for th in shooting_table.find('thead').find_all('th')]
    headers = headers[14:39]
    for header in headers:
        if header == '':
            headers.remove('')
    headers[8] = '% of FGA by Distance: 2P'
    headers[9] = '% of FGA by Distance: 0-3ft'
    headers[10] = '% of FGA by Distance: 3-10ft'
    headers[11] = '% of FGA by Distance: 10-16ft'
    headers[12] = '% of FGA by Distance: 16-3P'
    headers[13] = '% of FGA by Distance: 3P'
    headers[14] = 'FG% by Distance: 2P'
    headers[15] = 'FG% by Distance: 0-3ft'
    headers[16] = 'FG% by Distance: 3-10ft'
    headers[17] = 'FG% by Distance: 10-16ft'
    headers[18] = 'FG% by Distance: 16-3P'
    headers[19] = 'FG% by Distance: 3P'
    headers[20] = "% of FG Ast'd: 2P"
    headers[21] = "% of FG Ast'd: 3P"
    headers.append("Player")
    
    data_rows = shooting_table.tbody.find_all('tr')
    
    season_data = []

    # Iterate over rows to find data for the specific season and 'TOT' if available
    for row in data_rows:
        season_cell = row.find('th', {'data-stat': 'season'})
        team_id_cell = row.find('td', {'data-stat': 'team_id'})
        
        if season_cell.text.strip() == season:
            data = [td.text.strip() for td in row.find_all('td')][:25]
            data.pop(8)
            data.pop(14)
            data.pop(20)
            data.append(player_name)
            df = pd.DataFrame([data], columns=headers)
            df.set_index("Player", inplace=True)
            return df
        
    return pd.DataFrame()

shooting_data_dfs = {}
for pf in pf_players_2014:
    if pf[0] not in shooting_data_dfs.keys():
        shooting_data_dfs[pf[0]] = get_shooting_data(pf[1], '2013-14', pf[0])

shooting_data = pd.concat(shooting_data_dfs.values())
shooting_data

Unnamed: 0_level_0,Age,Tm,Lg,Pos,G,MP,FG%,Dist.,% of FGA by Distance: 2P,% of FGA by Distance: 0-3ft,...,% of FGA by Distance: 16-3P,% of FGA by Distance: 3P,FG% by Distance: 2P,FG% by Distance: 0-3ft,FG% by Distance: 3-10ft,FG% by Distance: 10-16ft,FG% by Distance: 16-3P,FG% by Distance: 3P,% of FG Ast'd: 2P,% of FG Ast'd: 3P
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Jeff Adrien,27,TOT,NBA,PF,53,961,.520,5.5,1.000,.560,...,.120,.000,.520,.617,.426,.333,.394,,.643,
LaMarcus Aldridge,28,POR,NBA,PF,69,2498,.458,12.5,.989,.186,...,.415,.011,.461,.675,.364,.388,.442,.200,.599,.667
Lavoy Allen,24,TOT,NBA,PF,65,1072,.447,9.7,.957,.290,...,.257,.043,.460,.563,.531,.286,.364,.154,.697,1.000
Lou Amundson,31,TOT,NBA,PF,19,185,.500,4.1,1.000,.625,...,.031,.000,.500,.650,.333,.000,.000,,.750,
Ryan Anderson,25,NOP,NBA,PF,22,795,.438,16.8,.537,.198,...,.155,.463,.463,.529,.333,.344,.527,.409,.420,.985
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Royce White,22,SAC,NBA,PF,3,9,.000,14.1,1.000,.000,...,.000,.000,.000,,,.000,,,,
Derrick Williams,22,TOT,NBA,"PF,SF",78,1820,.427,10.8,.795,.334,...,.135,.205,.470,.658,.333,.354,.323,.263,.600,.846
Marvin Williams,27,UTA,NBA,PF,66,1674,.439,16.6,.555,.194,...,.179,.445,.503,.725,.406,.344,.383,.359,.605,.988
Shawne Williams,27,LAL,NBA,PF,36,751,.380,19.4,.328,.151,...,.120,.672,.492,.483,.250,.667,.565,.326,.742,.952
