In [35]:
import requests
import pandas as pd
from datetime import datetime
from nba_api.stats.endpoints import boxscoreadvancedv3, boxscoretraditionalv3, scoreboardv2

In [36]:
# Dictionary to map TEAM_ID to TEAM_NAME
team_id_to_name = {
    '1610612737': 'Atlanta Hawks',
    '1610612738': 'Boston Celtics',
    '1610612739': 'Cleveland Cavaliers',
    '1610612740': 'New Orleans Pelicans',
    '1610612741': 'Chicago Bulls',
    '1610612742': 'Dallas Mavericks',
    '1610612743': 'Denver Nuggets',
    '1610612744': 'Golden State Warriors',
    '1610612745': 'Houston Rockets',
    '1610612746': 'LA Clippers',
    '1610612747': 'Los Angeles Lakers',
    '1610612748': 'Miami Heat',
    '1610612749': 'Milwaukee Bucks',
    '1610612750': 'Minnesota Timberwolves',
    '1610612751': 'Brooklyn Nets',
    '1610612752': 'New York Knicks',
    '1610612753': 'Orlando Magic',
    '1610612754': 'Indiana Pacers',
    '1610612755': 'Philadelphia 76ers',
    '1610612756': 'Phoenix Suns',
    '1610612757': 'Portland Trail Blazers',
    '1610612758': 'Sacramento Kings',
    '1610612759': 'San Antonio Spurs',
    '1610612760': 'Oklahoma City Thunder',
    '1610612761': 'Toronto Raptors',
    '1610612762': 'Utah Jazz',
    '1610612763': 'Memphis Grizzlies',
    '1610612764': 'Washington Wizards',
    '1610612765': 'Detroit Pistons',
    '1610612766': 'Charlotte Hornets'
}

In [37]:
# Fetches and combines significant traditional and advanced box score stats for a given NBA game.
# Parameters:
    # - game_id (str): The NBA game ID (e.g., '0022300123')
# Returns:
    # - DataFrame: A combined DataFrame with relevant stats for all players in the game

def collect_game_stats(game_id):
    # Fetch traditional box score data
    boxscore_trad = boxscoretraditionalv3.BoxScoreTraditionalV3(game_id=game_id)
    trad_df = boxscore_trad.get_data_frames()[0]
    
    # Fetch advanced box score data
    boxscore_adv = boxscoreadvancedv3.BoxScoreAdvancedV3(game_id=game_id)
    adv_df = boxscore_adv.get_data_frames()[0]
    
    # Select relevant columns from traditional stats
    trad_cols = ['personId', 'nameI', 'teamId', 'minutes', 'fieldGoalsMade', 'fieldGoalsAttempted', 'threePointersMade', 'threePointersAttempted', 'freeThrowsMade', 'freeThrowsAttempted', 'reboundsTotal', 'assists', 'blocks', 'turnovers', 'foulsPersonal', 'points']
    trad_stats = trad_df[trad_cols]
    
    # Select relevant columns from advanced stats
    adv_cols = ['personId', 'offensiveRating', 'defensiveRating', 'pace', 'trueShootingPercentage', 'usagePercentage']
    adv_stats = adv_df[adv_cols]
    
    # Map team names using team_id_to_name dictionary
    trad_stats['teamName'] = trad_stats['teamId'].astype(str).map(team_id_to_name)
    
    # Reorder columns to have 'teamName' before 'personId'
    trad_stats = trad_stats[['teamName'] + trad_cols]

    # Merge traditional and advanced stats on PLAYER_ID
    combined_df = pd.merge(trad_stats, adv_stats, on='personId')
    
    return combined_df

# # Example usage
# game_id = '0022000017'  # Replace with an actual game ID
# game_stats = collect_game_stats(game_id).head()
# print(game_stats)


In [59]:
def collect_full_game_info(game_id, game_date, home_team_id, away_team_id):
    # Fetch traditional box score data
    boxscore_trad = boxscoretraditionalv3.BoxScoreTraditionalV3(game_id=game_id)
    trad_df = boxscore_trad.get_data_frames()[0]
    
    # Fetch advanced box score data for extra stats if needed
    boxscore_adv = boxscoreadvancedv3.BoxScoreAdvancedV3(game_id=game_id)
    adv_df = boxscore_adv.get_data_frames()[0]

    # Extract scores for home and away teams
    home_team_score = trad_df[trad_df['teamId'] == int(home_team_id)]['points'].sum()
    away_team_score = trad_df[trad_df['teamId'] == int(away_team_id)]['points'].sum()
    total_points = home_team_score + away_team_score

    # Get players for home and away teams
    home_team_players = trad_df[trad_df['teamId'] == int(home_team_id)]['personId'].tolist()
    away_team_players = trad_df[trad_df['teamId'] == int(away_team_id)]['personId'].tolist()

    # Get team names from the mapping dictionary
    home_team_name = team_id_to_name.get(str(home_team_id), "Unknown")
    away_team_name = team_id_to_name.get(str(away_team_id), "Unknown")

    # Compile all game information into a single row dictionary
    game_info = {
        'game_id': game_id,
        'game_date': game_date,
        'home_team_name': home_team_name,
        'home_team_id': home_team_id,
        'away_team_name': away_team_name,
        'away_team_id': away_team_id,
        'home_team_score': home_team_score,
        'away_team_score': away_team_score,
        'total_points': total_points,
        'home_team_players': home_team_players,
        'away_team_players': away_team_players
    }

    # Convert the dictionary to a DataFrame
    game_info_df = pd.DataFrame([game_info])

    return game_info_df

# # Example usage
# game_info_df = collect_full_game_info('0022000015', '2020-12-23', '1610612737', '1610612741')
# print(game_info_df)

# Saves basic gamelogs

In [57]:
# Function to save basic game stats to a CSV file
def save_basic_game_to_csv(game_id, game_date, home_team_id, away_team_id):
    game_df = collect_full_game_info(game_id, game_date, home_team_id, away_team_id)
    if not game_df.empty:
        file_name = f'nba_gamelogs/game_{game_id}.csv'
        game_df.to_csv(file_name, index=False)
    else:
        print(f"No data exported for game {game_id}")

# Function to process games in specific sections
def process_game_sections(start_idx, end_idx, games_file='nba_REGULARSEASON_ONLY_game_pks.csv'):
    # Read all game data from the CSV
    all_games = pd.read_csv(games_file, dtype=str)
    
    # Select the desired section of games based on the provided indices
    selected_games = all_games.iloc[start_idx:end_idx]

    print(f"Processing games from index {start_idx} to {end_idx}...")

    # Loop through selected games and save stats
    for idx, game in selected_games.iterrows():
        game_id = game['GAME_ID']
        game_date = game['GAME_DATE']
        home_team_id = game['HOME_ID']
        away_team_id = game['AWAY_ID']
        save_basic_game_to_csv(game_id, game_date, home_team_id, away_team_id)

# Manually specify the section you want to process
start_index = 0  # Start from this index
end_index = 10  # End at this index (not inclusive)

process_game_sections(start_index, end_index)

Processing games from index 0 to 10...


## Saves game stats for each gamelog

In [None]:
# Function to save game stats to a CSV file
def save_game_to_csv(game_id):
    game_df = collect_game_stats(game_id)
    if not game_df.empty:
        file_name = f'nba_gamelogs/gameStats_{game_id}.csv'
        game_df.to_csv(file_name, index=False)
        #print(f"Data exported to {file_name}")
    else:
        print(f"No data exported for game {game_id}")

# Function to process games in specific sections
def process_game_sections(start_idx, end_idx, games_file='nba_REGULARSEASON_ONLY_game_pks.csv'):
    # Read all game IDs
    all_games = pd.read_csv(games_file, dtype=str).GAME_ID

    # Select the desired section of games based on the provided indices
    selected_games = all_games.iloc[start_idx:end_idx]

    print(f"Processing games from index {start_idx} to {end_idx}...")

    # Loop through selected games and save stats
    for game_id in selected_games:
        save_game_to_csv(game_id)

# Manually specify the section you want to process
# Modify these indices to process different sections of games
start_index = 0  # Start from this index
end_index = 10  # End at this index (not inclusive)

process_game_sections(start_index, end_index)