In [2]:
from nba_api.stats.static import players
from nba_api.stats.endpoints import PlayerDashboardByYearOverYear
import pandas as pd
import time
import os

csv_path = "nba_season_stats_all_players.csv"

# Load existing CSV or create empty DataFrame
if os.path.exists(csv_path):
    existing_df = pd.read_csv(csv_path, encoding='utf-8-sig')
    print(f"🔁 Loaded existing data: {len(existing_df)} rows")
else:
    existing_df = pd.DataFrame()
    print("🆕 No existing CSV found. Starting fresh.")

# Map player names to IDs using nba_api static player list
player_list = players.get_active_players()
player_map = {p['full_name']: p['id'] for p in player_list}

# Add PLAYER_ID column if missing
if 'PLAYER_ID' not in existing_df.columns:
    print("Adding PLAYER_ID column to existing data...")
    existing_df['PLAYER_ID'] = existing_df['PLAYER_NAME'].map(player_map)

# Create set of fetched player IDs to avoid re-fetching
fetched_ids = set(existing_df['PLAYER_ID'].dropna().astype(int).unique()) if not existing_df.empty else set()

all_stats = [existing_df] if not existing_df.empty else []

# Fetch data for players not yet in CSV
for i, player in enumerate(player_list):
    if player['id'] in fetched_ids:
        continue  # Skip already fetched players

    try:
        print(f"⏳ Fetching: {player['full_name']} ({i+1}/{len(player_list)})")
        stats = PlayerDashboardByYearOverYear(player_id=player['id'])
        season_data = stats.get_data_frames()[1]
        season_data['PLAYER_NAME'] = player['full_name']
        season_data['PLAYER_ID'] = player['id']
        all_stats.append(season_data)
        time.sleep(0.5)  # Respect API rate limit
    except Exception as e:
        print(f"❌ Failed for {player['full_name']}: {e}")
        continue

# Combine all data and save with utf-8-sig encoding for Power BI
if all_stats:
    final_df = pd.concat(all_stats, ignore_index=True)
    final_df.to_csv(csv_path, index=False, encoding='utf-8-sig')
    print(f"✅ Updated CSV saved: {csv_path} with {len(final_df)} rows")
else:
    print("⚠️ No new data fetched. CSV not changed.")


🔁 Loaded existing data: 3616 rows
Adding PLAYER_ID column to existing data...
⏳ Fetching: Taran Armstrong (15/572)
⏳ Fetching: Marcus Bagley (18/572)
⏳ Fetching: DaRon Holmes II (214/572)
⏳ Fetching: Patty Mills (343/572)
⏳ Fetching: Ethan Thompson (491/572)
⏳ Fetching: Nikola Topić (499/572)
✅ Updated CSV saved: nba_season_stats_all_players.csv with 3637 rows


  final_df = pd.concat(all_stats, ignore_index=True)
