In [5]:
import pandas as pd
import numpy as np
import time
from datetime import datetime, timedelta

from nba_api.stats import endpoints
from nba_api.stats.static import teams, players
import requests
from basketball_reference_scraper import players as br_players
from basketball_reference_scraper import teams as br_teams

In [None]:
class BasketballDataRetriever:
    def __init__(self):
        self.nba_teams = teams.get_teams()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
    
    def get_nba_player_stats(self, season='2024-25', season_type='Regular Season'):
        """Retrieve comprehensive NBA player statistics"""
        try:
            print(f"Fetching NBA player stats for {season}...")
            
            # Get basic player stats - CORRECT endpoint name
            player_stats = endpoints.LeagueDashPlayerStats(
                season=season,
                season_type_all_star=season_type,
                per_mode_detailed='PerGame'
            )
            basic_stats = player_stats.get_data_frames()[0]
            print(f"Retrieved basic stats for {len(basic_stats)} players")
            
            time.sleep(1)  # Rate limiting
            
            # Get advanced stats - CORRECT endpoint name
            try:
                advanced_stats = endpoints.LeagueDashPlayerStats(
                    season=season,
                    season_type_all_star=season_type,
                    measure_type_detailed_defense='Advanced'
                )
                advanced_df = advanced_stats.get_data_frames()[0]
                
                # Select key advanced metrics
                advanced_cols = ['PLAYER_ID']
                for col in ['USG_PCT', 'TS_PCT', 'PIE', 'PACE', 'ORTG', 'DRTG', 'NET_RATING']:
                    if col in advanced_df.columns:
                        advanced_cols.append(col)
                
                advanced_df = advanced_df[advanced_cols]
                
                # Merge datasets
                merged_stats = pd.merge(
                    basic_stats, 
                    advanced_df, 
                    on='PLAYER_ID', 
                    how='left'
                )
                print(f"Successfully merged advanced stats")
                
            except Exception as e:
                print(f"Could not retrieve advanced stats: {e}")
                print("Proceeding with basic stats only...")
                merged_stats = basic_stats
            
            time.sleep(1)  # Rate limiting
            return merged_stats
            
        except Exception as e:
            print(f"Error retrieving NBA player stats: {e}")
            # Try alternative approach
            return self._get_player_stats_alternative(season, season_type)
    
    def _get_player_stats_alternative(self, season='2024-25', season_type='Regular Season'):
        """Alternative method to get player stats"""
        try:
            print("Trying alternative data retrieval method...")
            
            # Use PlayerDashboardByGeneralSplits for individual players
            # First get list of active players
            all_players = players.get_active_players()
            
            if not all_players:
                print("No active players found, using different approach...")
                # Try league leaders as a proxy
                leaders = endpoints.LeagueLeaders(
                    season=season,
                    season_type_all_star=season_type
                )
                return leaders.get_data_frames()[0]
            
            # Get stats for a subset of players (to avoid rate limits)
            player_data = []
            for i, player in enumerate(all_players[:100]):  # Limit to first 100 players
                try:
                    player_stats = endpoints.PlayerDashboardByGeneralSplits(
                        player_id=player['id'],
                        season=season,
                        season_type_all_star=season_type
                    )
                    
                    player_df = player_stats.get_data_frames()[0]  # Overall stats
                    if not player_df.empty:
                        player_data.append(player_df)
                    
                    time.sleep(0.6)  # Rate limiting - NBA API is strict
                    
                    if (i + 1) % 10 == 0:
                        print(f"Processed {i + 1} players...")
                        
                except Exception as player_error:
                    print(f"Error for player {player['full_name']}: {player_error}")
                    continue
            
            if player_data:
                combined_stats = pd.concat(player_data, ignore_index=True)
                print(f"Retrieved stats for {len(combined_stats)} players using alternative method")
                return combined_stats
            else:
                return None
                
        except Exception as e:
            print(f"Alternative method also failed: {e}")
            return None
    
    def get_team_stats(self, league='NBA', season='2024-25'):
        """Get team-level statistics - CORRECTED"""
        try:
            print(f"Fetching {league} team stats for {season}...")
            
            if league == 'NBA':
                # CORRECT endpoint name for team stats
                team_stats = endpoints.LeagueDashTeamStats(
                    season=season,
                    season_type_all_star='Regular Season'
                )
                team_df = team_stats.get_data_frames()[0]
                print(f"Retrieved stats for {len(team_df)} teams")
                return team_df
            else:
                # WNBA team stats would need different approach
                print("WNBA team stats not implemented yet")
                return None
                
        except Exception as e:
            print(f"Error retrieving team stats: {e}")
            return None
    
    def get_game_logs(self, player_id, season='2024-25'):
        """Get detailed game-by-game logs for a player"""
        try:
            print(f"Fetching game logs for player {player_id}...")
            
            # CORRECT endpoint name
            game_logs = endpoints.PlayerGameLog(
                player_id=player_id,
                season=season,
                season_type_all_star='Regular Season'
            )
            game_df = game_logs.get_data_frames()[0]
            print(f"Retrieved {len(game_df)} games for player {player_id}")
            return game_df
            
        except Exception as e:
            print(f"Error retrieving game logs for player {player_id}: {e}")
            return None
    
    def get_lineup_data(self, team_id, season='2024-25'):
        """Get lineup statistics for advanced analysis"""
        try:
            print(f"Fetching lineup data for team {team_id}...")
            
            # CORRECT endpoint name
            lineups = endpoints.TeamDashLineups(
                team_id=team_id,
                season=season,
                season_type_all_star='Regular Season'
            )
            lineup_df = lineups.get_data_frames()[0]
            print(f"Retrieved {len(lineup_df)} lineup combinations")
            return lineup_df
            
        except Exception as e:
            print(f"Error retrieving lineup data for team {team_id}: {e}")
            return None
    
    def get_league_leaders(self, season='2024-25', stat_category='PTS'):
        """Get league leaders for specific statistical categories"""
        try:
            print(f"Fetching league leaders in {stat_category}...")
            
            leaders = endpoints.LeagueLeaders(
                season=season,
                season_type_all_star='Regular Season',
                stat_category_abbreviation=stat_category
            )
            leaders_df = leaders.get_data_frames()[0]
            print(f"Retrieved top {len(leaders_df)} players in {stat_category}")
            return leaders_df
            
        except Exception as e:
            print(f"Error retrieving league leaders: {e}")
            return None
    
    def get_player_info_by_name(self, player_name):
        """Get player information by name"""
        try:
            # Search for player
            player_list = players.find_players_by_full_name(player_name)
            if player_list:
                return player_list[0]  # Return first match
            else:
                # Try partial name search
                all_players = players.get_active_players()
                matches = [p for p in all_players if player_name.lower() in p['full_name'].lower()]
                return matches[0] if matches else None
                
        except Exception as e:
            print(f"Error finding player {player_name}: {e}")
            return None
    
    def test_api_connection(self):
        """Test if NBA API is working"""
        try:
            print("Testing NBA API connection...")
            
            # Simple test - get teams
            test_teams = teams.get_teams()
            print(f"API connection successful - found {len(test_teams)} teams")
            
            # Test endpoints
            try:
                leaders = endpoints.LeagueLeaders(season='2024-25')
                test_data = leaders.get_data_frames()[0]
                print(f"Endpoints working - retrieved {len(test_data)} league leaders")
                return True
                
            except Exception as endpoint_error:
                print(f"API connected but endpoints may have issues: {endpoint_error}")
                return False
                
        except Exception as e:
            print(f"API connection failed: {e}")
            return False

# Enhanced usage example with error handling
if __name__ == "__main__":
    retriever = BasketballDataRetriever()
    
    # Test API connection first
    if not retriever.test_api_connection():
        print("API connection failed. Please check your internet connection and try again.")
        exit(1)
    
    print("\n" + "="*50)
    print("STARTING DATA RETRIEVAL")
    print("="*50)
    
    # Try to get NBA player stats
    print("\n1. Retrieving NBA Player Stats...")
    nba_stats = retriever.get_nba_player_stats(season='2024-25')
    
    if nba_stats is not None and not nba_stats.empty:
        print(f"Success: Retrieved {len(nba_stats)} NBA player records")
        print(f"Columns available: {list(nba_stats.columns)}")
        
        # Save the data
        nba_stats.to_csv('./data/nba_player_stats_2024_25.csv', index=False)
        print("Data saved to 'nba_player_stats_2024_25.csv'")
        
        # Show sample data
        print("\nSample data:")
        print(nba_stats[['PLAYER_NAME', 'TEAM_ABBREVIATION', 'PTS', 'AST', 'REB']].head())
        
    else:
        print("Failed to retrieve NBA player stats")
        
        # Try league leaders as fallback
        print("\nTrying league leaders as fallback...")
        leaders = retriever.get_league_leaders()
        if leaders is not None:
            print(f"Fallback successful: Retrieved {len(leaders)} league leaders")
            leaders.to_csv('./data/nba_league_leaders_2024_25.csv', index=False)
        else:
            print("Fallback also failed")
    
    # Try to get team stats
    print("\n2. Retrieving Team Stats...")
    team_stats = retriever.get_team_stats()
    
    if team_stats is not None and not team_stats.empty:
        print(f"Success: Retrieved {len(team_stats)} NBA team records")
        team_stats.to_csv('./data/nba_team_stats_2024_25.csv', index=False)
        print("Data saved to 'nba_team_stats_2024_25.csv'")
    else:
        print("Failed to retrieve team stats")
    
    print("\n" + "="*50)
    print("DATA RETRIEVAL COMPLETE")
    print("="*50)

Testing NBA API connection...
API connection successful - found 30 teams
Endpoints working - retrieved 569 league leaders

STARTING DATA RETRIEVAL

1. Retrieving NBA Player Stats...
Fetching NBA player stats for 2024-25...
Retrieved basic stats for 569 players
Successfully merged advanced stats
Success: Retrieved 569 NBA player records
Columns available: ['PLAYER_ID', 'PLAYER_NAME', 'NICKNAME', 'TEAM_ID', 'TEAM_ABBREVIATION', 'AGE', 'GP', 'W', 'L', 'W_PCT', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS', 'NBA_FANTASY_PTS', 'DD2', 'TD3', 'WNBA_FANTASY_PTS', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK', 'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK', 'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK', 'OREB_RANK', 'DREB_RANK', 'REB_RANK', 'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK', 'PF_RANK', 'PFD_RANK', 'PTS_RANK',