In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
import time
import os

# Initialize the WebDriver
driver = webdriver.Chrome()

# Load existing data if available
file_path = 'nba_player_stats_last_10_years.csv'
if os.path.exists(file_path):
    df_existing = pd.read_csv(file_path)
    if 'Season' in df_existing.columns:
        existing_years = df_existing['Season'].unique()
    else:
        existing_years = []
else:
    df_existing = pd.DataFrame()
    existing_years = []

# Define the most recent year
year = 2024

if year in existing_years:
    print(f"Data for the {year} season already exists. Skipping...")
else:
    # Navigate to Basketball-Reference player stats page for the most recent year
    url = f'https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html'
    driver.get(url)
    time.sleep(2)  # Allow time for the page to load

    # Locate the table
    table = driver.find_element(By.ID, 'per_game_stats')

    # Extract data
    rows = table.find_elements(By.TAG_NAME, 'tr')
    all_data = []
    for row in rows:
        cols = row.find_elements(By.TAG_NAME, 'td')
        if cols:  # Only add rows with data
            cols = [col.text for col in cols]
            cols.append(year)  # Add the year to the data
            all_data.append(cols)

    # Create DataFrame
    columns = [header.get_attribute('aria-label') for header in table.find_elements(By.TAG_NAME, 'th') if header.get_attribute('aria-label')]
    columns.append('Season')  # Add a 'Season' column

    if all_data:
        df_new = pd.DataFrame(all_data, columns=columns[:len(all_data[0])])
        # Combine new data with existing data
        if not df_existing.empty:
            df = pd.concat([df_existing, df_new], ignore_index=True)
        else:
            df = df_new
    else:
        df = df_existing

    # Save to CSV
    df.to_csv(file_path, index=False)

# Clean up
driver.quit()

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8136 entries, 0 to 8135
Data columns (total 31 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Rk      8136 non-null   object
 1   Player  8125 non-null   object
 2   Age     8125 non-null   object
 3   Team    8125 non-null   object
 4   Pos     8125 non-null   object
 5   G       8125 non-null   object
 6   GS      8125 non-null   object
 7   MP      8125 non-null   object
 8   FG      8125 non-null   object
 9   FGA     8082 non-null   object
 10  FG%     8125 non-null   object
 11  3P      8125 non-null   object
 12  3PA     7533 non-null   object
 13  3P%     8125 non-null   object
 14  2P      8125 non-null   object
 15  2PA     8017 non-null   object
 16  2P%     8082 non-null   object
 17  eFG%    8125 non-null   object
 18  FT      8125 non-null   object
 19  FTA     7675 non-null   object
 20  FT%     8125 non-null   object
 21  ORB     8125 non-null   object
 22  DRB     8125 non-null   

In [5]:
df.head()

Unnamed: 0,Rk,Player,Age,Team,Pos,G,GS,MP,FG,FGA,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Awards
0,Kevin Durant,25.0,OKC,SF,81.0,81.0,38.5,10.5,20.8,0.503,...,6.7,7.4,5.5,1.3,0.7,3.5,2.1,32.0,"MVP-1,AS,NBA1",2014
1,Carmelo Anthony,29.0,NYK,PF,77.0,77.0,38.7,9.6,21.3,0.452,...,6.2,8.1,3.1,1.2,0.7,2.6,2.9,27.4,"MVP-15,AS",2014
2,LeBron James,29.0,MIA,PF,77.0,77.0,37.7,10.0,17.6,0.567,...,5.9,6.9,6.3,1.6,0.3,3.5,1.6,27.1,"MVP-2,DPOY-6,AS,NBA1",2014
3,Kevin Love,25.0,MIN,PF,77.0,77.0,36.3,8.4,18.5,0.457,...,9.6,12.5,4.4,0.8,0.5,2.5,1.8,26.1,"MVP-11,AS,NBA2",2014
4,James Harden,24.0,HOU,SG,73.0,73.0,38.0,7.5,16.5,0.456,...,3.9,4.7,6.1,1.6,0.4,3.6,2.4,25.4,"MVP-5,AS,NBA1",2014


In [7]:
df.tail()

Unnamed: 0,Rk,Player,Age,Team,Pos,G,GS,MP,FG,FGA,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Awards
8131,Ron Harper Jr.,23.0,TOR,PF,1.0,0.0,4.0,0.0,0.0,,...,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,,2024
8132,Justin Jackson,28.0,MIN,SF,2.0,0.0,0.5,0.0,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,2024
8133,Dmytro Skapintsev,25.0,NYK,C,2.0,0.0,1.0,0.0,0.5,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,2024
8134,Javonte Smart,24.0,PHI,PG,1.0,0.0,1.0,0.0,0.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,2024
8135,League Average,,,,,,,,,0.474,...,,,,,,,,,,2024


In [None]:
df.sort()