In [2]:
import pandas as pd
from nba_api.stats.endpoints import commonteamroster, leaguestandings
from nba_api.stats.static import teams
import time
from datetime import datetime

def get_season_string(year):
    """Convert year to NBA season string format (e.g., 2000 -> '2000-01')"""
    return f"{year}-{str(year + 1)[-2:]}"



def get_team_roster(team_id, season):
    """Get roster for a specific team and season"""
    try:
        roster = commonteamroster.CommonTeamRoster(
            team_id=team_id,
            season=season
        )
        time.sleep(0.6)  # Rate limiting
        return roster.get_data_frames()[0]
    except Exception as e:
        print(f"Error fetching roster for team {team_id} in {season}: {e}")
        return pd.DataFrame()

def calculate_roster_metrics(current_roster, previous_roster):
    """Calculate various roster construction metrics"""
    if current_roster.empty:
        return None
    
    # Get player IDs
    current_players = set(current_roster['PLAYER_ID'].values)
    previous_players = set(previous_roster['PLAYER_ID'].values) if not previous_roster.empty else set()
    
    # Continuity metrics
    retained_players = len(current_players.intersection(previous_players))
    new_players = len(current_players - previous_players)
    departed_players = len(previous_players - current_players)
    
    # Calculate continuity percentage
    continuity_pct = (retained_players / len(previous_players) * 100) if len(previous_players) > 0 else 0
    
    # Age metrics
    ages = pd.to_numeric(current_roster['AGE'], errors='coerce')
    avg_age = ages.mean()
    median_age = ages.median()
    oldest_player = ages.max()
    youngest_player = ages.min()
    
    # Roster size
    roster_size = len(current_players)
    
    # Experience metrics
    if 'EXP' in current_roster.columns:
        exp_values = pd.to_numeric(current_roster['EXP'].replace('R', '0'), errors='coerce')
        avg_experience = exp_values.mean()
        rookies_count = (current_roster['EXP'] == 'R').sum()
    else:
        avg_experience = None
        rookies_count = None
    
    return {
        'roster_size': roster_size,
        'retained_players': retained_players,
        'new_players': new_players,
        'departed_players': departed_players,
        'continuity_pct': continuity_pct,
        'avg_age': avg_age,
        'median_age': median_age,
        'oldest_player': oldest_player,
        'youngest_player': youngest_player,
        'avg_experience': avg_experience,
        'rookies_count': rookies_count
    }

def collect_roster_construction_data(start_year=2010, end_year=2024):
    """
    Collect roster construction data for all teams from start_year to end_year
    Handles team name changes (Hornets/Pelicans, Bobcats/Hornets)
    
    Args:
        start_year: Starting year (default 2010 for 2010-11 season)
        end_year: Ending year (default 2024 for 2024-25 season)
    """
    # Get all NBA teams (includes historical teams)
    all_teams = teams.get_teams()
    
    # Storage for results
    all_data = []
    
    # Storage for previous season rosters by team_id
    previous_rosters = {}
    
    print(f"Collecting data from {start_year}-{start_year+1} to {end_year}-{end_year+1}")
    print(f"Total teams: {len(all_teams)}")
    print("-" * 60)
    
    for year in range(start_year, end_year + 1):
        season = get_season_string(year)
        print(f"\nProcessing season: {season}")
        
        for team in all_teams:
            team_id = team['id']
            team_name = team['full_name']
            
            print(f"  Fetching {team_name}...", end=" ")
            
            # Get current season roster
            current_roster = get_team_roster(team_id, season)
            
            if current_roster.empty:
                print("No data")
                continue
            
            # Get previous season roster for continuity calculations
            # Use team_id as key to handle team name changes automatically
            previous_roster = previous_rosters.get(team_id, pd.DataFrame())
            
            # Calculate metrics
            metrics = calculate_roster_metrics(current_roster, previous_roster)
            
            if metrics:
                metrics.update({
                    'season': season,
                    'team_id': team_id,
                    'team_name': team_name
                })
                all_data.append(metrics)
                print("✓")
            else:
                print("Failed")
            
            # Store current roster for next year's continuity calculation
            previous_rosters[team_id] = current_roster
    
    # Convert to DataFrame
    df = pd.DataFrame(all_data)
    
    # Reorder columns for better readability
    column_order = [
        'season', 'team_name', 'team_id', 'roster_size',
        'retained_players', 'new_players', 'departed_players', 'continuity_pct',
        'avg_age', 'median_age', 'oldest_player', 'youngest_player',
        'avg_experience', 'rookies_count'
    ]
    
    df = df[column_order]
    
    return df

# Main execution
if __name__ == "__main__":
    print("Starting NBA Roster Construction Data Collection")
    print("=" * 60)
    
    # Collect data from 2010-11 to 2024-25 season
    roster_data = collect_roster_construction_data(start_year=2010, end_year=2024)
    
    # Save to CSV
    output_file = 'nba_roster_construction_2010_2024.csv'
    roster_data.to_csv(output_file, index=False)
    print(f"\n{'=' * 60}")
    print(f"Data collection complete!")
    print(f"Total records: {len(roster_data)}")
    print(f"Saved to: {output_file}")
    
    # Display summary statistics
    print("\nSummary Statistics:")
    print(roster_data.describe())
    
    # Display sample data
    print("\nSample data (first 5 rows):")
    print(roster_data.head())

Starting NBA Roster Construction Data Collection
Collecting data from 2010-2011 to 2024-2025
Total teams: 30
------------------------------------------------------------

Processing season: 2010-11
  Fetching Atlanta Hawks... ✓
  Fetching Boston Celtics... ✓
  Fetching Cleveland Cavaliers... ✓
  Fetching New Orleans Pelicans... ✓
  Fetching Chicago Bulls... ✓
  Fetching Dallas Mavericks... ✓
  Fetching Denver Nuggets... ✓
  Fetching Golden State Warriors... ✓
  Fetching Houston Rockets... ✓
  Fetching Los Angeles Clippers... ✓
  Fetching Los Angeles Lakers... ✓
  Fetching Miami Heat... ✓
  Fetching Milwaukee Bucks... ✓
  Fetching Minnesota Timberwolves... ✓
  Fetching Brooklyn Nets... ✓
  Fetching New York Knicks... ✓
  Fetching Orlando Magic... ✓
  Fetching Indiana Pacers... ✓
  Fetching Philadelphia 76ers... ✓
  Fetching Phoenix Suns... ✓
  Fetching Portland Trail Blazers... ✓
  Fetching Sacramento Kings... ✓
  Fetching San Antonio Spurs... ✓
  Fetching Oklahoma City Thunder... ✓
  F