In [1]:
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains

# Initialize the Selenium driver
driver = webdriver.Chrome()

# List out the seasons and the different page types
data_types = ['touches', 'drives', 'shooting-efficiency', 'offensive-rebounding', 'defensive-rebounding', 'defensive-impact']

# Base URL
base_url = 'https://www.nba.com/stats/players/touches?PlayerPosition=F&Season=2013-14&SeasonType=Regular+Season'

# Dictionary to hold all final DataFrames
final_dfs = {'2013-14': {}, '2014-15': {}, '2015-16': {}, '2016-17': {}, '2017-18': {}, '2018-19': {}, '2019-20': {}, '2020-21': {}, '2021-22': {}, '2022-23': {}, '2023-24': {}}

for season in final_dfs:
    for d in data_types:
        # List to store all DataFrames
        all_dataframes = []
        
        # Open the first page
        driver.get(f'https://www.nba.com/stats/players/{d}?PlayerPosition=F&Season={season}&SeasonType=Regular+Season')
        
        for i in range(5):  # Loop through the three pages
            time.sleep(10)  # Wait for the page to load
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            
            # Find the table
            table = soup.find('table', {'class': 'Crom_table__p1iZz'})
            
            # Parse the table data
            df = pd.read_html(str(table))[0]  # Converts HTML table to DataFrame
            all_dataframes.append(df)
            
            # Click the "Next" button to go to the next page
            next_button = driver.find_element(By.XPATH, '//*[@id="__next"]/div[2]/div[2]/div[3]/section[2]/div/div[2]/div[2]/div[1]/div[5]/button[2]')
            driver.execute_script("arguments[0].click();", next_button)
                        
        # Combine into one dataframe
        final_dfs[season][d] = pd.concat(all_dataframes, ignore_index=True)
        
        # Capitalize the column names in 'touches' and 'defense' to normalize them with the other dataframes
        if d == 'touches':
            final_dfs[season][d].columns = final_dfs[season][d].columns.str.upper()
        if d == 'defensive-impact':
            final_dfs[season][d].columns = final_dfs[season][d].columns.str.upper()

driver.quit()

merged_dfs = {}

# Merge the dataframes into a singular dataframe per season
for season in final_dfs:
    merged_dfs[season] = pd.merge(final_dfs[season]['touches'], final_dfs[season]['drives'], on='PLAYER', suffixes=('', 'drives'))
    merged_dfs[season] = pd.merge(merged_dfs[season], final_dfs[season]['shooting-efficiency'], on='PLAYER', suffixes=('', 'shooting-efficiency'))
    merged_dfs[season] = pd.merge(merged_dfs[season], final_dfs[season]['offensive-rebounding'], on='PLAYER', suffixes=('', 'offensive-rebounding'))
    merged_dfs[season] = pd.merge(merged_dfs[season], final_dfs[season]['defensive-rebounding'], on='PLAYER', suffixes=('', 'defensive-rebounding'))
    merged_dfs[season] = pd.merge(merged_dfs[season], final_dfs[season]['defensive-impact'], on='PLAYER', suffixes=('', 'defensive-impact'))
    merged_dfs[season] = merged_dfs[season].drop(columns = [col for col in merged_dfs[season].columns if 'drives' in col or 'shooting-efficiency' in col or 'offensive-rebounding' in col or 'defensive-rebounding' in col or 'defensive-impact' in col])

In [3]:
for df in merged_dfs:
    merged_dfs[df].to_csv(f'{df}.csv')