## Daily Fetch
* This appends a player's existing individual dataset with all the new games up to real-time. 
* to speed up the script, specify the number of games back to search for active players to pull data
* ** this number should be at least the number of games that have been played since the script was last run**

In [48]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import pprint
import re 
from dateutil import parser
import time
from datetime import date, datetime, timedelta
from nba_api.stats.endpoints import playergamelog, commonallplayers
import os

In [60]:
import pandas as pd
from nba_api.stats.endpoints import playergamelog

def get_complete_career_gamelog(player_id):
    """
    Fetches a player's complete NBA career game log and returns it as a DataFrame.
    If no data is found for the player, an empty DataFrame is returned.

    Parameters:
    - player_id (str): The NBA player's ID (e.g., '201939' for Stephen Curry)

    Returns:
    - DataFrame: A DataFrame containing the player's career game logs across all seasons.
    """
    all_seasons_gamelogs = []
    
    # List of all NBA seasons in the form 'YYYY-YY'
    seasons = [f'{year}-{str(year+1)[-2:]}' for year in range(2020, 2024)]  # Adjust range as needed

    # Loop through each season and fetch the player's game logs
    for season in seasons:
        try:
            # Fetch game log for a particular season
            player_gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season)
            season_gamelog_df = player_gamelog.get_data_frames()[0]
            
            if not season_gamelog_df.empty:
                season_gamelog_df['SEASON'] = season  # Add a column to track the season
                all_seasons_gamelogs.append(season_gamelog_df)
        except Exception as e:
            print(f"Error fetching data for season {season}: {e}")

    # If no game logs were found, return an empty DataFrame
    if not all_seasons_gamelogs:
        return pd.DataFrame()

    # Combine all game logs into a single DataFrame
    career_gamelog_df = pd.concat(all_seasons_gamelogs, ignore_index=True)

    return career_gamelog_df

# # Example usage
# player_id = '201939'  # Stephen Curry's player ID as an example
# career_gamelog = get_complete_career_gamelog(player_id).head()

# # Display the career game log DataFrame
# print(career_gamelog)

## Get Active Players

In [None]:
def get_active_players():
    """
    Fetches a list of all active NBA players and returns it as a DataFrame.

    Returns:
    - DataFrame: A DataFrame containing the player ID, player name, and other relevant details for active players.
    """
    # Fetch all players data
    all_players = commonallplayers.CommonAllPlayers(is_only_current_season=1).get_data_frames()[0]
    
    # Filter for active players (based on the 'ROSTERSTATUS' column)
    active_players_df = all_players[all_players['ROSTERSTATUS'] == 1]
    
    # Select relevant columns (you can modify this based on what details you want)
    active_players_df = active_players_df[['PERSON_ID', 'DISPLAY_FIRST_LAST', 'TEAM_ID', 'TEAM_NAME', 'FROM_YEAR', 'TO_YEAR', 'PLAYER_SLUG']]
    
    return active_players_df

# Example usage
active_players = get_active_players()

# Display the DataFrame of active players
print(active_players.tail())

# Save to CSV
active_players.to_csv('active_players.csv', index=False)


## This code snippet should be run when building the complete active players career gamelogs

In [63]:
import os
import pandas as pd

def save_player_career_gamelogs(players_file, output_folder):
    """
    Fetches the complete career game log for each player in the players_file and saves it in individual CSV files.
    Skips players who already have their data saved or have no game log data.

    Parameters:
    - players_file (str): The file name containing the active players' data.
    - output_folder (str): The directory where the individual player game logs will be saved.
    """
    # Load the active players' data from the CSV file
    active_players_df = pd.read_csv(players_file)
    
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Loop through each player in the active_players_df
    for index, player in active_players_df.iterrows():
        player_id = player['PERSON_ID']
        player_name = player['DISPLAY_FIRST_LAST']

        # Clean the player's name for filename purposes (remove spaces, etc.)
        player_filename = f"{player_name.replace(' ', '_').replace('.', '')}.csv"
        player_file_path = os.path.join(output_folder, player_filename)

        # Skip the player if the file already exists
        if os.path.exists(player_file_path):
            #print(f"File for {player_name} already exists. Skipping...")
            continue
        
        try:
            # Fetch the player's complete career game log
            career_gamelog_df = get_complete_career_gamelog(str(player_id))

            if career_gamelog_df.empty:
                print(f"No game log data found for {player_name}. Skipping...")
                continue

            # Save the game log to a CSV file
            career_gamelog_df.to_csv(player_file_path, index=False)
            print(f"Saved {player_name}'s career game log to {player_file_path}")

        except Exception as e:
            print(f"Error processing {player_name} (ID: {player_id}): {e}")
            continue  # Continue processing other players even if there's an error

# Run the function
save_player_career_gamelogs('active_players.csv', 'players')

No game log data found for Jaylin Galloway. Skipping...


## The function below, should be run for appending the recent games to the players career gamelogs

In [None]:
import pandas as pd
import os

def update_player_gamelogs_with_current_season(players_folder, current_season):
    """
    Appends the current season's games to each player's game log file in the players folder.
    Ensures no duplicate games are added to the player's file.
    
    Parameters:
    - players_folder (str): The directory where the individual player game logs are stored.
    - current_season (str): The current NBA season in 'YYYY-YY' format (e.g., '2023-24').
    """
    # Get the list of player files in the directory
    player_files = [f for f in os.listdir(players_folder) if f.endswith('.csv')]

    for player_file in player_files:
        player_file_path = os.path.join(players_folder, player_file)

        try:
            # Load the existing player game log
            existing_gamelog_df = pd.read_csv(player_file_path)

            # Extract player ID from the filename
            player_name = player_file.replace('.csv', '').replace('_', ' ')
            player_id = existing_gamelog_df['PLAYER_ID'].iloc[0]  # Assuming PLAYER_ID exists in the existing file
            
            # Fetch the player's current season game log
            current_season_gamelog_df = get_complete_career_gamelog(player_id)
            
            # Filter only the current season's games
            current_season_games_df = current_season_gamelog_df[current_season_gamelog_df['SEASON'] == current_season]
            
            if current_season_games_df.empty:
                print(f"No new games found for {player_name} in the current season.")
                continue

            # Ensure no duplicates by removing rows where GAME_ID already exists in the existing file
            combined_gamelog_df = pd.concat([existing_gamelog_df, current_season_games_df])
            combined_gamelog_df.drop_duplicates(subset='GAME_ID', keep='first', inplace=True)

            # Save the updated game log back to the file
            combined_gamelog_df.to_csv(player_file_path, index=False)

            print(f"Updated {player_name}'s game log with current season games.")

        except Exception as e:
            print(f"Error updating game log for {player_name}: {e}")

update_player_gamelogs_with_current_season(players_folder='players', current_season='2023-24')