In [1]:
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.static import players
import pandas as pd
import time
from tqdm import tqdm
import logging
from datetime import datetime

class NBADataCollector:
    def __init__(self):
        self.start_time = datetime.now().strftime("%Y%m%d_%H%M%S") # storing the start time
        
        logging.basicConfig(
            filename="nba_data_collection.log",
            level=logging.INFO,
            format="%(asctime)s - %(levelname)s - %(message)s"
        )

    def get_all_players(self):
        """
        Get a list of all NBA players in the database
        """
        
        try:
            return players.get_players()
        except Exception as e:
            logging.error(f"Error fetching players list: {str(e)}")
            return []
    
    def get_player_stats(self, player_id):
        """
        Get career stats broken down by season for a single player
        Raises exceptions in case of a timeout to be handled by the caller function
        """
        
        try:
            time.sleep(0.6)  # rate limiting to not get timed out by the NBA.
            career = playercareerstats.PlayerCareerStats(
                player_id=player_id
            )
            df = career.get_data_frames()[0]
            return df
        
        except Exception as e:
            logging.error(f"Timeout/Request error fetching stats for player ID {player_id}: {str(e)}")
            raise # re-raise all exceptions to be handled by the caller

    def season_id_to_year(self, season_id):
        """
        Convert NBA API season ID to end year
        The NBA API's format for seasons is "YYYY-YY" ("1968-69", "2022-23")
        Returns the end year (1969, 2023)
        """
        try:
            start_year = int(season_id.split('-')[0])
            return start_year + 1
        except:
            return None

    def save_data(self, data, filename=None):
        """
        Saves the collected data
        """
        
        if filename is None:
            filename = f'nba_player_stats_{datetime.now().strftime("%Y%m%d")}.csv'
        
        data.to_csv(filename, index=False)
        logging.info(f"Data saved to {filename}")

    def collect_all_players_stats(self, start_year=1980, retry_delay=600): # 600 secs = 10 minutes
        """
        Collect the stats for all NBA players since start_year
        Pauses and retries from the last player in case of timeout.
        Saves the progress incrementally whenever there's an error or timeout.
        
        start_year: Earliest season to include (defaults to 1980)
        retry_delay: Time it takes to retry in case of timeout.
        """
        
        all_players = self.get_all_players()
        logging.info(f"Found {len(all_players)} players in total")
        
        all_stats = []
        current_index = 0

        while current_index < len(all_players):
            try:
                for player in tqdm(all_players[current_index:], desc="Iterating through the players", initial=current_index, total=len(all_players)):
                    player_stats = self.get_player_stats(player["id"])
    
                    if player_stats is not None:
                        player_stats["SEASON_START_YEAR"] = player_stats["SEASON_ID"].apply(self.season_id_to_year)
                        player_stats = player_stats[player_stats["SEASON_START_YEAR"] >= start_year]
                        
                        if not player_stats.empty:
                            player_stats["PLAYER_NAME"] = player["full_name"]
                            all_stats.append(player_stats)
                    
                    current_index += 1
                    
                break # if loop finished without timeout

            except Exception as e:
                logging.error(f"Error at player index {current_index} ({all_players[current_index]['full_name']}): {str(e)}")
                print(f"\nTimeout occurred at player: {all_players[current_index]['full_name']}")
                print(f"Successfully processed {current_index} out of {len(all_players)} players")
                print(f"Waiting {retry_delay/60} minutes before retrying...")
                
                if all_stats:  # saving the progress
                    temp_df = pd.concat(all_stats, ignore_index=True)
                    temp_df = temp_df.sort_values(["PLAYER_NAME", "SEASON_START_YEAR"])
                    self.save_data(temp_df, filename=f"temp_nba_player_stats_partial_{self.start_time}.csv")
                    print(f"Progress saved to temp_nba_player_stats_partial_{self.start_time}.csv")
                
                time.sleep(retry_delay)
                print("Retrying collection...")
    
        if all_stats:
            combined_stats = pd.concat(all_stats, ignore_index=True)
            combined_stats = combined_stats.sort_values(["PLAYER_NAME", "SEASON_START_YEAR"])
            return combined_stats
        return None

    def PerGameCalculator(self, df):
        """
        Calculates per-game statistics from season totals
        """

        if df is None or df.empty:
            logging.warning("Empty DataFrame received in PerGameCalculator")
            return df
 
        counting_stats = ['MIN', 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA', 
                         'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']  # the list of counting stats to convert to per-game
        

        df_per_game = df.copy() # creating a copy to avoid modifying the original DataFrame
        

        for stat in counting_stats:
            if stat in df.columns:
                new_col = f"{stat}_PER_GAME"
                df_per_game[new_col] = df[stat] / df['GP']
                df_per_game[new_col] = df_per_game[new_col].round(2)
        
        return df_per_game

def main():
    collector = NBADataCollector()

    while True:
        per_mode = input("Enter stats type (1 for Totals, 2 for Per Game): ").strip()
        if per_mode in ("1", "2"):
            break
        print("Please enter 1 for Totals or 2 for Per Game.")
        
    per_mode = "Totals" if per_mode == "1" else "PerGame"
    
    logging.info(f"Starting data collection. Stat type = {per_mode}")
    all_stats = collector.collect_all_players_stats(start_year=1980)

    if per_mode == "PerGame":
        all_stats = collector.PerGameCalculator(all_stats)

    all_stats = all_stats.drop(columns=["PLAYER_ID", "TEAM_ID", "LEAGUE_ID"], errors="ignore")
        
    collector.save_data(all_stats)
    print(f"Successfully collected data for {len(all_stats["PLAYER_NAME"].unique())} players")
    print(f"Total seasons recorded: {len(all_stats)}")
    print(f"Year range: {all_stats["SEASON_START_YEAR"].min()} - {all_stats["SEASON_START_YEAR"].max()}")
    
    print("\nAvailable statistics:")
    print(all_stats.columns.tolist())
    
    print("\nExample data:")
    print(all_stats.head())


        

if __name__ == "__main__":
    main()

Enter stats type (1 for Totals, 2 for Per Game):  2


Iterating through the players:   1%|▎                                              | 27/5011 [01:41<5:13:23,  3.77s/it]



Timeout occurred at player: Jeff Adrien
Successfully processed 27 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  12%|█████▌                                        | 602/5011 [17:07<2:11:17,  1.79s/it]



Timeout occurred at player: Kobe Bufkin
Successfully processed 602 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  12%|██████▋                                                 | 602/5011 [00:22<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Kobe Bufkin
Successfully processed 602 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  12%|██████▋                                                 | 602/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Kobe Bufkin
Successfully processed 602 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  12%|██████▋                                                 | 602/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Kobe Bufkin
Successfully processed 602 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  14%|██████▋                                       | 722/5011 [03:58<2:22:17,  1.99s/it]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Jay Carty
Successfully processed 722 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  14%|████████                                                | 722/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Jay Carty
Successfully processed 722 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  14%|████████                                                | 722/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Jay Carty
Successfully processed 722 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  14%|████████                                                | 722/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Jay Carty
Successfully processed 722 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  14%|████████                                                | 722/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Jay Carty
Successfully processed 722 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  26%|███████████▉                                 | 1324/5011 [18:33<1:53:41,  1.85s/it]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Jordan Farmar
Successfully processed 1324 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  26%|██████████████▌                                        | 1324/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Jordan Farmar
Successfully processed 1324 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  26%|██████████████▌                                        | 1324/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Jordan Farmar
Successfully processed 1324 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  26%|██████████████▌                                        | 1324/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Jordan Farmar
Successfully processed 1324 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  29%|████████████▉                                | 1444/5011 [04:03<2:00:46,  2.03s/it]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Enes Freedom
Successfully processed 1444 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  29%|███████████████▊                                       | 1444/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Enes Freedom
Successfully processed 1444 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  29%|███████████████▊                                       | 1444/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Enes Freedom
Successfully processed 1444 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  29%|███████████████▊                                       | 1444/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Enes Freedom
Successfully processed 1444 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  29%|███████████████▊                                       | 1444/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Enes Freedom
Successfully processed 1444 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  29%|███████████████▊                                       | 1444/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Enes Freedom
Successfully processed 1444 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  41%|██████████████████▎                          | 2046/5011 [18:42<1:32:07,  1.86s/it]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Lou Hudson
Successfully processed 2046 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  41%|██████████████████████▍                                | 2046/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: Lou Hudson
Successfully processed 2046 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  41%|██████████████████████▍                                | 2046/5011 [00:30<?, ?it/s]



Timeout occurred at player: Lou Hudson
Successfully processed 2046 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  41%|██████████████████████▍                                | 2046/5011 [00:30<?, ?it/s]



Timeout occurred at player: Lou Hudson
Successfully processed 2046 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  43%|███████████████████▍                         | 2166/5011 [04:08<1:38:20,  2.07s/it]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: John Janisch
Successfully processed 2166 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  43%|███████████████████████▊                               | 2166/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: John Janisch
Successfully processed 2166 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  43%|███████████████████████▊                               | 2166/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: John Janisch
Successfully processed 2166 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  43%|███████████████████████▊                               | 2166/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: John Janisch
Successfully processed 2166 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  43%|███████████████████████▊                               | 2166/5011 [00:30<?, ?it/s]



Timeout occurred at player: John Janisch
Successfully processed 2166 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  43%|███████████████████████▊                               | 2166/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: John Janisch
Successfully processed 2166 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  55%|████████████████████████▊                    | 2768/5011 [19:08<1:11:20,  1.91s/it]



Timeout occurred at player: Terance Mann
Successfully processed 2768 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  55%|██████████████████████████████▍                        | 2768/5011 [00:30<?, ?it/s]



Timeout occurred at player: Terance Mann
Successfully processed 2768 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  55%|██████████████████████████████▍                        | 2768/5011 [00:30<?, ?it/s]



Timeout occurred at player: Terance Mann
Successfully processed 2768 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  55%|██████████████████████████████▍                        | 2768/5011 [00:30<?, ?it/s]



Timeout occurred at player: Terance Mann
Successfully processed 2768 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  58%|█████████████████████████▉                   | 2888/5011 [04:15<1:15:11,  2.13s/it]



Timeout occurred at player: John McConathy
Successfully processed 2888 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  58%|███████████████████████████████▋                       | 2888/5011 [00:30<?, ?it/s]



Timeout occurred at player: John McConathy
Successfully processed 2888 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  58%|███████████████████████████████▋                       | 2888/5011 [00:30<?, ?it/s]



Timeout occurred at player: John McConathy
Successfully processed 2888 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  58%|███████████████████████████████▋                       | 2888/5011 [00:30<?, ?it/s]



Timeout occurred at player: John McConathy
Successfully processed 2888 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  58%|███████████████████████████████▋                       | 2888/5011 [00:30<?, ?it/s]



Timeout occurred at player: John McConathy
Successfully processed 2888 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  58%|███████████████████████████████▋                       | 2888/5011 [00:30<?, ?it/s]
  temp_df = pd.concat(all_stats, ignore_index=True)



Timeout occurred at player: John McConathy
Successfully processed 2888 out of 5011 players
Waiting 10.0 minutes before retrying...
Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  70%|████████████████████████████████▋              | 3490/5011 [18:35<46:57,  1.85s/it]



Timeout occurred at player: Reggie Perry
Successfully processed 3490 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  70%|██████████████████████████████████████▎                | 3490/5011 [00:30<?, ?it/s]



Timeout occurred at player: Reggie Perry
Successfully processed 3490 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  70%|██████████████████████████████████████▎                | 3490/5011 [00:30<?, ?it/s]



Timeout occurred at player: Reggie Perry
Successfully processed 3490 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  70%|██████████████████████████████████████▎                | 3490/5011 [00:30<?, ?it/s]



Timeout occurred at player: Reggie Perry
Successfully processed 3490 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  72%|█████████████████████████████████▊             | 3610/5011 [03:59<46:32,  1.99s/it]



Timeout occurred at player: Gabe Pruitt
Successfully processed 3610 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  72%|███████████████████████████████████████▌               | 3610/5011 [00:30<?, ?it/s]



Timeout occurred at player: Gabe Pruitt
Successfully processed 3610 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  72%|███████████████████████████████████████▌               | 3610/5011 [00:30<?, ?it/s]



Timeout occurred at player: Gabe Pruitt
Successfully processed 3610 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  72%|███████████████████████████████████████▌               | 3610/5011 [00:30<?, ?it/s]



Timeout occurred at player: Gabe Pruitt
Successfully processed 3610 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  72%|███████████████████████████████████████▌               | 3610/5011 [00:30<?, ?it/s]



Timeout occurred at player: Gabe Pruitt
Successfully processed 3610 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  72%|███████████████████████████████████████▌               | 3610/5011 [00:30<?, ?it/s]



Timeout occurred at player: Gabe Pruitt
Successfully processed 3610 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  83%|██████████████████████████████████████▊        | 4137/5011 [15:37<25:55,  1.78s/it]



Timeout occurred at player: Jim Smith
Successfully processed 4137 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  83%|█████████████████████████████████████▏       | 4146/5011 [00:49<1:20:00,  5.55s/it]



Timeout occurred at player: Michael Smith
Successfully processed 4146 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  84%|███████████████████████████████████████▌       | 4212/5011 [02:17<27:45,  2.08s/it]



Timeout occurred at player: Ryan Stack
Successfully processed 4212 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  84%|██████████████████████████████████████████████▏        | 4212/5011 [00:30<?, ?it/s]



Timeout occurred at player: Ryan Stack
Successfully processed 4212 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  84%|██████████████████████████████████████████████▏        | 4212/5011 [00:22<?, ?it/s]



Timeout occurred at player: Ryan Stack
Successfully processed 4212 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  86%|████████████████████████████████████████▋      | 4332/5011 [03:40<20:46,  1.84s/it]



Timeout occurred at player: Johnny Taylor
Successfully processed 4332 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  86%|███████████████████████████████████████████████▌       | 4332/5011 [00:30<?, ?it/s]



Timeout occurred at player: Johnny Taylor
Successfully processed 4332 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  86%|███████████████████████████████████████████████▌       | 4332/5011 [00:30<?, ?it/s]



Timeout occurred at player: Johnny Taylor
Successfully processed 4332 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  86%|███████████████████████████████████████████████▌       | 4332/5011 [00:30<?, ?it/s]



Timeout occurred at player: Johnny Taylor
Successfully processed 4332 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  86%|███████████████████████████████████████████████▌       | 4332/5011 [00:30<?, ?it/s]



Timeout occurred at player: Johnny Taylor
Successfully processed 4332 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  98%|██████████████████████████████████████████████▎| 4934/5011 [19:35<02:30,  1.95s/it]



Timeout occurred at player: Randy Woods
Successfully processed 4934 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  98%|██████████████████████████████████████████████████████▏| 4934/5011 [00:30<?, ?it/s]



Timeout occurred at player: Randy Woods
Successfully processed 4934 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  98%|██████████████████████████████████████████████████████▏| 4934/5011 [00:30<?, ?it/s]



Timeout occurred at player: Randy Woods
Successfully processed 4934 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players:  98%|██████████████████████████████████████████████████████▏| 4934/5011 [00:30<?, ?it/s]



Timeout occurred at player: Randy Woods
Successfully processed 4934 out of 5011 players
Waiting 10.0 minutes before retrying...


  temp_df = pd.concat(all_stats, ignore_index=True)


Progress saved to temp_nba_player_stats_partial_20250303_145947.csv
Retrying collection...


Iterating through the players: 100%|███████████████████████████████████████████████| 5011/5011 [02:14<00:00,  1.74s/it]
  combined_stats = pd.concat(all_stats, ignore_index=True)


Successfully collected data for 3671 players
Total seasons recorded: 24552
Year range: 1980 - 2025

Available statistics:
['SEASON_ID', 'TEAM_ABBREVIATION', 'PLAYER_AGE', 'GP', 'GS', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'SEASON_START_YEAR', 'PLAYER_NAME', 'MIN_PER_GAME', 'FGM_PER_GAME', 'FGA_PER_GAME', 'FG3M_PER_GAME', 'FG3A_PER_GAME', 'FTM_PER_GAME', 'FTA_PER_GAME', 'OREB_PER_GAME', 'DREB_PER_GAME', 'REB_PER_GAME', 'AST_PER_GAME', 'STL_PER_GAME', 'BLK_PER_GAME', 'TOV_PER_GAME', 'PF_PER_GAME', 'PTS_PER_GAME']

Example data:
     SEASON_ID TEAM_ABBREVIATION  PLAYER_AGE  GP    GS     MIN  FGM  FGA  \
8366   1985-86               LAL        22.0  82   1.0  1542.0  209  388   
8367   1986-87               LAL        23.0  79  72.0  2240.0  316  587   
8368   1987-88               LAL        24.0  82  64.0  2636.0  322  640   
8369   1988-89               LAL        25.0  82  82.0  2