In [2]:
import streamlit as st
from streamlit_option_menu import option_menu
import pandas as pd
import numpy as np
import os
import random
from nba_stats_predictor import NBAStatsPredictor
import time
import requests
from io import StringIO





In [2]:


def scrape_player_game_logs(player_id, season="2025"):
    """
    Scrapes a player's NBA season game logs from Basketball Reference.
    
    Args:
        player_id (str): Player ID from Basketball Reference (e.g., 'jamesle01' or 'j/jamesle01')
        season (str): Season year (e.g., '2024' for 2023-2024 season)
    """
    # Check if player_id already contains the first letter path
    if '/' in player_id:
        # Player ID already includes the letter path (e.g., 'j/jamesle01')
        url = f"https://www.basketball-reference.com/players/{player_id}/gamelog/{season}"
    else:
        # Player ID is just the ID part (e.g., 'jamesle01')
        url = f"https://www.basketball-reference.com/players/{player_id[0]}/{player_id}/gamelog/{season}"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    print(f"Sending request to Basketball Reference for player {player_id}...")
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            print(f"Request successful! Status code: {response.status_code}")
            html_io = StringIO(response.text)
            print("Parsing tables with pandas...")
            try:
                tables = pd.read_html(html_io, attrs={'id': 'pgl_basic'})
                if tables and len(tables) > 0:
                    print("Found game log table by ID!")
                    game_log_df = tables[0]
                    processed_df = process_dataframe(game_log_df)
                    processed_df['player_id'] = player_id  # Add player_id column
                    return processed_df
            except Exception as e:
                print(f"Couldn't find table by ID: {e}")
                html_io.seek(0)
                tables = pd.read_html(html_io)
                tables_sorted = sorted(tables, key=lambda x: len(x), reverse=True)
                if tables_sorted:
                    game_log_df = tables_sorted[0]
                    print(f"Table found! Dimensions: {game_log_df.shape}")
                    processed_df = process_dataframe(game_log_df)
                    processed_df['player_id'] = player_id  # Add player_id column
                    return processed_df
                else:
                    print("No tables found on the page.")
                    return None
        else:
            print(f"Request failed with status code: {response.status_code}")
            return None
    except Exception as e:
        print(f"Error during scraping: {e}")
        return None

def process_dataframe(game_log_df):
    """
    Process the raw dataframe from Basketball Reference to clean and format it properly.
    """
    print("Processing the game log dataframe...")
    # Print the first few rows to help with debugging
    print("First few rows of raw data:")
    print(game_log_df.head(3))
    
    if isinstance(game_log_df.columns, pd.MultiIndex):
        # Handle multi-level columns
        game_log_df.columns = [' '.join(str(col) for col in cols if str(col) != 'Unnamed: 0_level_0').strip() 
                             for cols in game_log_df.columns.values]
    
    print("Raw columns:", game_log_df.columns.tolist())
    
    # Remove header rows (where Rk column appears again in the data)
    game_log_df = game_log_df[~game_log_df.iloc[:, 0].astype(str).str.contains("Rk")]
    
    # Remove unnamed columns
    unnamed_cols = [col for col in game_log_df.columns if 'Unnamed' in str(col)]
    if unnamed_cols:
        game_log_df = game_log_df.drop(columns=unnamed_cols)
    
    # Handle team column naming
    if 'Tm' in game_log_df.columns and 'Team' not in game_log_df.columns:
        game_log_df = game_log_df.rename(columns={'Tm': 'Team'})
    elif 'Tm' in game_log_df.columns and 'Team' in game_log_df.columns and game_log_df['Team'].isna().all():
        game_log_df['Team'] = game_log_df['Tm']
        game_log_df = game_log_df.drop(columns=['Tm'])
    
    # Fill missing team values
    if 'Team' in game_log_df.columns and game_log_df['Team'].isna().any():
        game_log_df['Team'] = game_log_df['Team'].fillna('Unknown')
    elif 'Team' not in game_log_df.columns and 'Tm' not in game_log_df.columns:
        game_log_df['Team'] = 'Unknown'
    
    # Check for and handle the MP (Minutes Played) column
    if 'MP' in game_log_df.columns:
        
        game_log_df['MP'] = game_log_df['MP'].apply(lambda x: convert_minutes_format(x) if pd.notna(x) else x)
    elif 'MIN' in game_log_df.columns:
        game_log_df = game_log_df.rename(columns={'MIN': 'MP'})
        game_log_df['MP'] = game_log_df['MP'].apply(lambda x: convert_minutes_format(x) if pd.notna(x) else x)
    
    # Convert all possible numeric columns
    for col in game_log_df.columns:
        if col not in ['Date', 'Tm', 'Team', 'Opp', 'Result', 'GS', 'player_id']:
            game_log_df[col] = pd.to_numeric(game_log_df[col], errors='coerce')
    
    # Additional check for MP column
    if 'MP' not in game_log_df.columns:
        print("MP column not found in the data. Available columns:", game_log_df.columns.tolist())
        game_log_df['MP'] = None  # Create the column with None values if it doesn't exist
        
        
        minute_column_variants = ['Minutes', 'Min', 'Mins', 'Minutes Played']
        for col_name in minute_column_variants:
            if col_name in game_log_df.columns:
                game_log_df['MP'] = game_log_df[col_name]
                game_log_df['MP'] = game_log_df['MP'].apply(lambda x: convert_minutes_format(x) if pd.notna(x) else x)
                print(f"Found minutes data in column: {col_name}")
                break
    
    expected_columns = ['Date', 'Team', 'Opp', 'Result', 'MP']
    missing_cols = [col for col in expected_columns if col not in game_log_df.columns]
    if missing_cols:
        print(f"Warning: Missing expected columns: {missing_cols}")
    
    return game_log_df

def convert_minutes_format(minutes_str):
    """
    Convert minutes from 'MM:SS' string format to decimal minutes.
    Example: '36:12' becomes 36.2 (36 minutes and 12 seconds)
    """
    try:
        if isinstance(minutes_str, (int, float)):
            return float(minutes_str)
        elif isinstance(minutes_str, str):
            if ':' in minutes_str:
                parts = minutes_str.split(':')
                if len(parts) == 2:
                    minutes = int(parts[0])
                    seconds = int(parts[1])
                    return minutes + seconds/60
            else:
                return float(minutes_str)
        return None
    except Exception as e:
        print(f"Error converting minutes format: {e}, value was: {minutes_str}")
        return None

def save_to_csv(df, filename='nba_player_game_logs.csv'):
    """
    Saves the DataFrame to a CSV file.
    """
    if df is not None:
        try:
            df.to_csv(filename, index=False)
            print(f"Data saved to {filename}")
            return True
        except Exception as e:
            print(f"Error saving CSV: {e}")
            return False
    else:
        print("No data to save.")
        return False

def main():
    """
    Main function to execute the scraping workflow for multiple players.
    """
    # List of player IDs to scrape
    player_ids = [
        "j/jamesle01", "c/curryst01", "d/duranke01", "a/antetgi01", "d/doncilu01", 
        "j/jokicni01", "e/embiijo01", "t/tatumja01", "b/butleji01", "l/leonaka01", 
        "l/lillada01", "h/hardeja01", "d/davisan02", "b/bookede01", "m/mitchdo01", 
        "w/willizi01", "m/moranja01", "y/youngtr01", "t/townska01", "b/bealbr01", 
        "g/georgpa01", "i/irvinky01", "p/paulch01", "d/derozde01", "w/westbru01", 
        "a/adebaba01", "h/holidjr01", "m/middlkh01", "s/siakapa01", "v/vanvlfr01", 
        "g/gilgesh01", "i/ingrabr01", "m/mccolcj01", "b/ballla01", "h/halibty01", 
        "r/randlju01", "b/barrerj01", "f/foxde01", "s/sabondo01", "t/turnemy01", 
        "p/portemi01", "m/murraja01", "w/wiggian01", "g/greenra01", "v/vucicni01", 
        "m/mobleev01", "s/smithja02", "b/barnesc01", "b/banchpa01", "s/suggsca01"
    ]
    
    season = "2025"  
    
    print(f"Starting NBA game log scraper for {len(player_ids)} players...")
    start_time = time.time()
    
    all_game_logs = []
    
    
    checkpoint_interval = 5  # Save after every 5 players
    
    for i, player_id in enumerate(player_ids):
        print(f"\nScraping data for player {i+1}/{len(player_ids)}: {player_id}")
        player_game_logs = scrape_player_game_logs(player_id, season)
        
        if player_game_logs is not None:
            all_game_logs.append(player_game_logs)
            print(f"Successfully scraped {len(player_game_logs)} games for {player_id}")
            
            # Save checkpoint after every few players
            if (i + 1) % checkpoint_interval == 0 and all_game_logs:
                checkpoint_df = pd.concat(all_game_logs, ignore_index=True)
                checkpoint_filename = f"checkpoint_nba_logs_{i+1}_players.csv"
                save_to_csv(checkpoint_df, checkpoint_filename)
                print(f"Checkpoint saved to {checkpoint_filename} after {i+1} players")
            
            # Add a significant delay between players to avoid rate limiting
            if i < len(player_ids) - 1:  
                wait_time = 10 + random.randint(5, 15)  
                print(f"Waiting {wait_time} seconds before next player to avoid rate limiting...")
                time.sleep(wait_time)
        else:
            print(f"Failed to scrape data for {player_id}")
            
            if i < len(player_ids) - 1:
                wait_time = 15 + random.randint(5, 15)
                print(f"Waiting {wait_time} seconds before next player...")
                time.sleep(wait_time)
    
    # Combine all player data into a single DataFrame
    if all_game_logs:
        combined_df = pd.concat(all_game_logs, ignore_index=True)
        print(f"\nCombined dataset created with {len(combined_df)} total game logs")
        
        # Save combined data
        save_to_csv(combined_df, f"nba_game_logs_{season}.csv")
        
        # Print sample and statistics
        print("\nSample of the scraped data:")
        print(combined_df.head())
        
        print("\nBasic statistics:")
        print(f"Total games: {len(combined_df)}")
        print(f"Players included: {combined_df['player_id'].nunique()}")
        print("\nAvailable columns:")
        print(combined_df.columns.tolist())
        
        # Save intermediate results after every 10 players
        print("\nTotal time elapsed:", time.time() - start_time)
    else:
        print("Scraping failed for all players.")


In [3]:


def load_existing_data(filename='nba_game_logs_2025.csv'):
    """
    Load existing dataset if available, return empty DataFrame if file doesn't exist.
    """
    try:
        if os.path.exists(filename):
            df = pd.read_csv(filename)
            print(f"Loaded existing dataset with {len(df)} records from {filename}")
            return df
        else:
            print(f"File {filename} not found. Will create a new dataset.")
            return None
    except Exception as e:
        print(f"Error loading existing data: {e}")
        return None

def scrape_additional_players(new_player_ids, existing_filename='nba_game_logs_2025.csv', season="2025"):
    """
    Scrapes game logs for new players and combines with existing data.
    """
    # Load existing data
    existing_df = load_existing_data(existing_filename)
    
    # Get list of players already in the dataset to avoid duplicates
    existing_players = set()
    if existing_df is not None and 'player_id' in existing_df.columns:
        existing_players = set(existing_df['player_id'].unique())
        print(f"Existing dataset contains {len(existing_players)} players")
    
    # Filter out players that are already in the dataset
    players_to_scrape = [p_id for p_id in new_player_ids if p_id not in existing_players]
    
    if not players_to_scrape:
        print("All players in the new list are already in the dataset. Nothing to scrape.")
        return existing_df
    
    print(f"Scraping data for {len(players_to_scrape)} new players...")
    
    # Scrape the new players
    start_time = time.time()
    new_game_logs = []
    checkpoint_interval = 5
    
    for i, player_id in enumerate(players_to_scrape):
        print(f"\nScraping data for new player {i+1}/{len(players_to_scrape)}: {player_id}")
        player_game_logs = scrape_player_game_logs(player_id, season)
        
        if player_game_logs is not None:
            new_game_logs.append(player_game_logs)
            print(f"Successfully scraped {len(player_game_logs)} games for {player_id}")
            
            # Save checkpoint after every few players
            if (i + 1) % checkpoint_interval == 0 and new_game_logs:
                checkpoint_df = pd.concat(new_game_logs, ignore_index=True)
                checkpoint_filename = f"new_checkpoint_nba_logs_{i+1}_players.csv"
                save_to_csv(checkpoint_df, checkpoint_filename)
                print(f"Checkpoint saved to {checkpoint_filename} after {i+1} new players")
            
            # No waiting time between players
            print("Continuing to next player immediately...")
        else:
            print(f"Failed to scrape data for {player_id}")
            
            if i < len(players_to_scrape) - 1:
                print("Continuing to next player...")
    
    # Combine all new player data
    if new_game_logs:
        new_combined_df = pd.concat(new_game_logs, ignore_index=True)
        print(f"\nNew dataset created with {len(new_combined_df)} total game logs")
        
        # Merge with existing data if it exists
        if existing_df is not None:
            # Combine new data with existing data
            final_df = pd.concat([existing_df, new_combined_df], ignore_index=True)
            print(f"Combined dataset now has {len(final_df)} total game logs from {final_df['player_id'].nunique()} players")
        else:
            final_df = new_combined_df
            print(f"No existing data to merge. New dataset has {len(final_df)} game logs")
        
        # Save combined data
        save_to_csv(final_df, existing_filename)
        
        # Print sample and statistics
        print("\nSample of the updated dataset:")
        print(final_df.head())
        
        print("\nBasic statistics:")
        print(f"Total games: {len(final_df)}")
        print(f"Players included: {final_df['player_id'].nunique()}")
        print("Total scraping time:", time.time() - start_time)
        
        return final_df
    else:
        print("Scraping failed for all new players.")
        return existing_df

def main_additional():
    """
    Main function to add new players to the existing dataset.
    """
    # Specify the new list of player IDs to scrape
    new_player_ids = [
        # Add your new list of player IDs here
        "e/edwaran01", "g/giddesh01", "h/hendeco01", "m/maxeyty01", "m/murraja01", 
        "h/holmgri01", "b/brogdma01", "a/aytonde01", "j/johnsja05", "r/reeveau01",
        "b/brownja02", "b/banchpa01", "w/wagnefr01", "s/sengaal01", "t/thomptr01",
        "p/porzikr01", "m/murrays01", "b/brunjan01", "h/hartjo01", "a/aldrila01",
        "r/russeda01", "h/hayesjk01", "w/washinpj01", "c/claxcni01", "d/davisan03",
        "g/gainesd01", "m/millspa01", "l/lowryky01", "w/whitede01", "c/claxtca01",
        "g/goberru01", "a/allenja01", "p/poweljn01", "b/brissoo01", "p/poeleja01"
    ]
    
    existing_filename = "nba_game_logs_2025.csv"  # The name of your existing dataset
    season = "2025"
    
    # Run the scraping and data aggregation
    final_dataset = scrape_additional_players(new_player_ids, existing_filename, season)
    
    if final_dataset is not None:
        print("\nData aggregation complete! Updated dataset is saved to", existing_filename)
    else:
        print("\nFailed to update the dataset.")



In [82]:
def main_additional():
    """
    Main function to add new players to the existing dataset.
    """
    # Specify the new list of player IDs to scrape
    new_player_ids = ["h/holidaa01", "e/easonta01", "s/strawju01", "w/wiggiaa01", "d/dortlu01",
                    "w/willike04","t/thomaca01", "c/claxtni01", "d/dennijo01", "s/smithde03", "h/harrijo01", "o/onealro01", "s/sharpda01", "m/millspa01", "w/watanyu01", "s/sumneed01", "c/curryse01", "e/edwarka01", "t/thomani01", "w/williro04", "d/davidjd01", "f/fosterj01",
                      "l/lavizan01", "d/debromo01", "v/vucevni01", "w/willipa01", "c/carusal01", "d/dosunao01", "j/jonesde01", "d/dalecda01", "b/bradleto01", "g/greenja01", "s/simonju01", "t/terryda01", "w/whiteco01", "b/browntr01", "h/hillma01", "m/mccoysc01", "f/fosterj01",
                      "s/siakapa01", "v/vanvlfr01", "a/anunoog01", "b/barnesc01", "t/trenth01", "a/achiupr01", "b/boucher01", "y/youngth01", "f/flynnma01", "b/bantoda01", "k/kolocha01", "d/dowtije01", "h/harpejr01", "p/porterj01", "c/champju01", "j/johnsda01", "m/makuksv01",
                      "y/youngtr01", "m/murraju01", "c/collijo01", "h/hunterd01", "c/capela01", "b/bogdabo01", "g/griffaj01", "o/okongon01", "j/johnsja01", "m/matthega02", "f/fernabr01", "k/krejcvi01", "b/beysa01", "h/holidju01", "m/martike01", "f/forbema01",
                      "b/butleji01", "a/adebaba01", "h/herroty01", "l/lowryky01", "s/strusma01", "m/marti01", "r/robindu01", "v/vincega01", "o/oladivi01", "y/yurtuse01", "d/dedmode01", "h/highmja01", "j/jovicni01", "c/cainja01", "g/garrema01", "h/hasleud01",
                      "b/banchpa01", "w/wagnefr01", "s/suggsja01", "c/cartewe01", "f/fultzma01", "h/harriga01", "o/okekech01", "b/bollbo01", "h/houstca01", "m/mcgrima01", "s/schofad01", "r/rosste01", "a/anthoco01", "m/morriis01", "g/gilesha01", "h/harrisga01",
                      "b/bealbr01", "p/porzikr01", "k/kuzmaky01", "m/morrimo01", "w/wrightd01", "a/avdijde01", "g/gaffoda01", "d/davisjo01", "g/goodwjo01", "t/todda01", "c/careyve01", "b/bartowi01", "h/huffja01", "g/gillan01", "n/nunnke01", "s/smithis01",
                      "d/doncilu01", "i/irvinky01", "h/hardati02", "p/poweldw01", "g/greenda01", "w/woodch01", "b/bullore01", "m/mcgeeja01", "h/harpero01", "w/wrighfr01", "h/harremo01", "h/harperj01", "f/finnedo01", "n/noweljo01", "h/holidju01",
                      "j/jokicni01", "m/murrama01", "g/gordoaa01", "p/portemi01", "k/kanca01", "b/brownbr01", "g/greenje02", "s/smithis01", "n/najiz01", "w/watsopj01", "b/braunch01", "r/reedda01", "j/jackstr01", "g/gillesh01"
                      "c/curryst01", "t/thompkl01", "g/greendr01", "w/wiggian01", "p/poolejo01", "l/looneke01", "d/divin01", "j/jacksja01", "k/kuminga01", "m/moodymo01", "j/jamesle01", "r/rolliry01", "g/greenja01",
                      "g/greenja01", "p/porteke02", "s/sengu01", "e/easonta01", "j/jabarsm01", "m/martike01", "n/nixda01", "w/washity01", "g/garreke01", "m/matthega02", "k/kaminsk01", "d/daysda01", "f/fosterj01",
                      "l/leonaka01", "g/georgepa01", "p/powelno01", "z/zubaciv01", "m/morrist01", "j/jacksre01", "b/batumni01", "c/coffeam01", "m/mannte01", "w/walljo01", "b/bostobr01", "p/prestja01", "d/diabamo01",
                      "j/jamesle01", "d/davisan02", "r/russeda01", "b/beaslma01", "v/vandeja01", "s/schrode01", "r/reavesa01", "w/walkelo01", "h/hachimr01", "b/browntr01", "g/gabriwe01", "c/christm01", "p/pippesc01",
                      "m/moranja01", "b/baneja01", "j/jacksja01", "a/adamsst01", "t/tillmxa01", "j/jonesty01", "c/clarkbr01", "k/konchjo01", "a/aldam01", "l/laravja01", "w/williza01", "r/rodchda01", "k/kennalu01",
                      "t/townska01", "e/edwaran01", "g/goberru01", "a/andersa01", "m/mcdanja02", "n/nazrere01", "j/jordan01", "f/forbema01", "r/russeda01", "n/nowelja01", "k/knightna01", "m/moorewe01",
                      "w/willizi01", "i/ingrabr01", "m/mccolcj01", "v/valanjo01", "h/hayesja02", "a/alvarjo01", "j/joneshe01", "m/murphytr01", "d/daniels01", "n/nancela02", "l/lewiske01", "t/tempeg01", "m/marshja01",
                      "g/gilgeal01", "g/giddejo01", "d/dortlu01", "w/willija07", "j/joelma01", "s/saricda01", "w/wallaja01", "j/jaylijo01", "m/manntr01", "w/watsoke01", "j/jeromty01", "d/diengo01",
                      "b/bookede01", "d/duranke01", "p/paulch01", "a/aytonde01", "o/okogijo01", "l/landada01", "s/shamet01", "b/biyombi01", "w/warrati01", "r/rosste01", "l/leeja01", "w/wainrda01",
                      "l/lillada01", "s/sharppo01", "g/grantje01", "n/nurkiju01", "s/simona01", "e/eubanks01", "t/thybuma01", "w/watfutr01", "k/knoxke01", "c/camero01", "j/johnske04", "m/mayssk01",
                      "f/foxde01", "s/sabondo01", "h/huertra01", "b/barnesh01", "m/monkma01", "m/mitcdda01", "l/lenal01", "l/lylestr01", "e/edwarke01", "d/dellama01", "o/okpalk01", "m/metuch01",
                      "v/victowi01", "j/jonestr01", "k/keldoke01", "s/sochaja01", "c/colliza01", "b/bassike01", "m/mcderdo01", "g/grahamd01", "b/batesem01", "l/langfro01", "d/diengo01", "b/barlowd01",
                      "m/markkla01", "c/clarkjo01", "s/sextoca01", "o/olenyni01", "k/kesslwa01", "b/beaslma01", "t/tuckepa01", "a/agbadoc01", "v/vanderj01", "g/gayru01", "f/fontega01", "a/azubuud01",
                    "e/edeyza01","c/castlst01"

                      ]

    existing_filename = "nba_game_logs_2025.csv"  # The name of your existing dataset
    season = "2025"
    
    # Run the scraping and data aggregation
    final_dataset = scrape_additional_players(new_player_ids, existing_filename, season)
    
    if final_dataset is not None:
        print("\nData aggregation complete! Updated dataset is saved to", existing_filename)
    else:
        print("\nFailed to update the dataset.")

if __name__ == '__main__':
    # Use main_additional() instead of main() to scrape additional players
    main_additional()


Loaded existing dataset with 22665 records from nba_game_logs_2025.csv
Existing dataset contains 280 players
Scraping data for 214 new players...

Scraping data for new player 1/214: h/holidaa01
Sending request to Basketball Reference for player h/holidaa01...
Request successful! Status code: 200
Parsing tables with pandas...
Couldn't find table by ID: No tables found
Table found! Dimensions: (89, 34)
Processing the game log dataframe...
First few rows of raw data:
    Rk Gcar Gtm        Date Team Unnamed: 5  Opp      Result            GS  \
0  NaN  NaN   1  2024-10-23  HOU        NaN  CHO  L, 105-110  Did Not Play   
1    1  387   2  2024-10-25  HOU        NaN  MEM  W, 128-108             0   
2    1  NaN   3  2024-10-26  HOU          @  SAS  L, 106-109  Did Not Play   

             MP  ...           DRB           TRB           AST           STL  \
0  Did Not Play  ...  Did Not Play  Did Not Play  Did Not Play  Did Not Play   
1         02:13  ...             0             0         

In [3]:
df=pd.read_csv("nba_game_logs_2025.csv")
df

Unnamed: 0,Rk,Gcar,Gtm,Date,Team,Opp,Result,GS,MP,FG,...,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc,+/-,player_id
0,1.0,1493.0,1.0,2024-10-22,LAL,MIN,"W, 110-103",*,34.650000,7.0,...,5.0,4.0,0.0,2.0,2.0,3.0,16.0,10.1,-6.0,j/jamesle01
1,2.0,1494.0,2.0,2024-10-25,LAL,PHO,"W, 123-116",*,34.700000,7.0,...,4.0,8.0,0.0,0.0,2.0,1.0,21.0,17.9,14.0,j/jamesle01
2,3.0,1495.0,3.0,2024-10-26,LAL,SAC,"W, 131-127",*,33.766667,12.0,...,14.0,10.0,0.0,1.0,5.0,3.0,32.0,27.1,13.0,j/jamesle01
3,4.0,1496.0,4.0,2024-10-28,LAL,PHO,"L, 105-109",*,35.800000,3.0,...,5.0,8.0,1.0,0.0,2.0,1.0,11.0,6.9,-17.0,j/jamesle01
4,5.0,1497.0,5.0,2024-10-30,LAL,CLE,"L, 110-134",*,28.966667,9.0,...,6.0,3.0,0.0,0.0,6.0,2.0,26.0,18.0,-17.0,j/jamesle01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23407,67.0,549.0,79.0,2025-04-08,CHO,MEM,"L, 100-124",0,19.250000,3.0,...,0.0,1.0,0.0,0.0,1.0,0.0,9.0,5.3,-10.0,c/curryse01
23408,67.0,,80.0,2025-04-09,CHO,TOR,"L, 96-126",Did Not Play,,,...,,,,,,,,,,c/curryse01
23409,68.0,550.0,81.0,2025-04-11,CHO,BOS,"L, 94-130",0,18.600000,6.0,...,2.0,0.0,1.0,0.0,1.0,1.0,17.0,14.0,-7.0,c/curryse01
23410,68.0,,82.0,2025-04-13,CHO,BOS,"L, 86-93",Did Not Play,,,...,,,,,,,,,,c/curryse01


In [42]:
df["Team"].unique()

array(['LAL', 'Unknown', 'GSW', 'PHO', 'MIL', 'DAL', 'DEN', 'PHI', 'BOS',
       'MIA', 'LAC', 'CLE', 'NOP', 'MEM', 'ATL', 'NYK', 'SAS', 'SAC',
       'WAS', 'IND', 'HOU', 'OKC', 'MIN', 'POR', 'ORL', 'BRK', 'CHI',
       'UTA', 'DET', 'TOR', 'CHO', 'Team'], dtype=object)

In [84]:
unique_players_per_team = df.groupby("Team")["player_id"].nunique()

unique_players_per_team


Team
ATL         10
BOS         16
BRK         12
CHI         11
CHO         22
CLE         11
DAL         13
DEN         10
DET         12
GSW         10
HOU         10
IND         15
LAC         11
LAL          9
MEM         10
MIA         10
MIL          9
MIN          9
NOP          9
NYK          9
OKC         11
ORL          9
PHI          9
PHO         11
POR          9
SAC         10
SAS         10
TOR         14
Team         1
UTA          9
Unknown    288
WAS          9
Name: player_id, dtype: int64

In [4]:
df["player_id"].nunique()

289

In [80]:
e=df[df["Team"]=="OKC"]
e["player_id"].unique()

array(['g/gilgesh01', 'h/holmgch01', 'c/carusal01', 'm/mitchaj01',
       'w/willija07', 'h/harteis01', 'w/willija06', 'j/joeis01'],
      dtype=object)

In [4]:
player_name_map = {
    "j/jamesle01": "LeBron James",
    "c/curryst01": "Stephen Curry",
    "d/duranke01": "Kevin Durant",
    "a/antetgi01": "Giannis Antetokounmpo",
    "d/doncilu01": "Luka Dončić",
    "j/jokicni01": "Nikola Jokić",
    "e/embiijo01": "Joel Embiid",
    "t/tatumja01": "Jayson Tatum",
    "b/butleji01": "Jimmy Butler",
    "l/leonaka01": "Kawhi Leonard",
    "l/lillada01": "Damian Lillard",
    "h/hardeja01": "James Harden",
    "d/davisan02": "Anthony Davis",
    "b/bookede01": "Devin Booker",
    
    "m/mitchdo01": "Donovan Mitchell",
    "w/willizi01": "Zion Williamson",
    "m/moranja01": "Ja Morant",
    "y/youngtr01": "Trae Young",
    "t/townska01": "Karl-Anthony Towns",
    "b/bealbr01": "Bradley Beal",
    "g/georgpa01": "Paul George",
    "i/irvinky01": "Kyrie Irving",
    "p/paulch01": "Chris Paul",
    "d/derozde01": "DeMar DeRozan",
    "w/westbru01": "Russell Westbrook",
    "a/adebaba01": "Bam Adebayo",
    "h/holidjr01": "Jrue Holiday",
    "m/middlkh01": "Khris Middleton",
    
    "s/siakapa01": "Pascal Siakam",
    "v/vanvlfr01": "Fred VanVleet",
    "g/gilgesh01": "Shai Gilgeous-Alexander",
    "e/edwaran01": "Anthony Edwards",
    "m/maxeyty01": "Tyrese Maxey",
    "m/murraja01": "Jamal Murray",
    "b/brogdma01": "Malcolm Brogdon",
    "a/aytonde01": "Deandre Ayton",
    "j/johnsja05": "Jalen Johnson",
    "b/brownja02": "Jaylen Brown",
    "b/banchpa01": "Paolo Banchero",
    "w/wagnefr01": "Franz Wagner",
    "t/thomptr01": "Tristan Thompson",
    "p/porzikr01": "Kristaps Porziņģis",
    
    "h/hartjo01": "Josh Hart",
    "r/russeda01": "D'Angelo Russell",
    "l/lowryky01": "Kyle Lowry",
    "w/whitede01": "Derrick White",
    "g/goberru01": "Rudy Gobert",
    "a/allenja01": "Jarrett Allen",
    "g/giddejo01": "Josh Giddey",
    "h/hendesc01": "Scoot Henderson",
    "j/johnsja01": "James Johnson",
    "m/markkla01": "Lauri Markkanen",
    "g/garlada01": "Darius Garland",
    "b/brunsja01": "Jalen Brunson",
    "b/banede01": "Desmond Bane",
    "c/cunnica01": "Cade Cunningham",
    
    "h/holmgch01": "Chet Holmgren",
    "r/reaveau01": "Austin Reaves",
    "s/sengual01": "Alperen Şengün",
    "w/washipj01": "P.J. Washington",
    "c/claxtni01": "Nic Claxton",
    "g/greenja05": "Jalen Green",
    "q/quickim01": "Immanuel Quickley",
    "b/bridgmi01": "Mikal Bridges",
    "b/barnesc01": "Scottie Barnes",
    "m/mobleev01": "Evan Mobley",
    "w/wembavi01": "Victor Wembanyama",
    "v/vucevni01": "Nikola Vučević",
    "t/turnemy01": "Myles Turner",
    "b/balllo01": "Lonzo Ball",
    
    "g/greendr01": "Draymond Green",
    "h/halibty01": "Tyrese Haliburton",
    "f/foxde01": "De'Aaron Fox",
    "p/portemi01": "Michael Porter Jr.",
    "a/allengr01": "Grayson Allen",
    "b/brownmo01": "Moses Brown",
    "h/herroty01": "Tyler Herro",
    "i/ingrabr01": "Brandon Ingram",
    "w/wagnemo01": "Moritz Wagner",
    "h/hieldbu01": "Buddy Hield",
    "r/robindu01": "Duncan Robinson",
    "n/nurkiju01": "Jusuf Nurkić",
    "v/vandeja01": "Jarred Vanderbilt",
    "c/capelca01": "Clint Capela",
    
    "w/wisemja01": "James Wiseman",
    "c/carusal01": "Alex Caruso",
    "h/hayesja02": "Jaxson Hayes",
    "c/chrisma02": "Max Christie",
    "d/daniedy01": "Dyson Daniels",
    "l/lavinza01": "Zach LaVine",
    "g/gordoaa01": "Aaron Gordon",
    "c/clingdo01": "Donovan Clingan",
    "h/hollaro01": "Ron Holland",
    "b/buzelma01": "Matthew Buzelis",
    "c/castlst01": "Stephon Castle",
    "w/wellsja01": "Jaylen Wells",
    "r/risacza01": "Zachariah Risacher",
    "e/edeyza01": "Zach Edey",
    
    "s/sarral01": "Alexandre Sarr",
    "w/wareke01": "Kel'el Ware",
    "f/filipky01": "Kyle Filipowski",
    "k/kolekty01": "Tyler Kolek",
    "f/furphjo01": "Johnny Furphy",
    "k/klintbo01": "Bobi Klintman",
    "m/mitchaj01": "Ajay Mitchell",
    "i/ighodos01": "Osasere Ighodaro",
    "b/bonaad01": "Adem Bona",
    "s/simpskj01": "K.J. Simpson",
    "l/larsspe01": "Pelle Larsson",
    "s/sheadja01": "Jamal Shead",
    "r/reevean01": "Antonio Reaves",
    "n/newtotr01": "Tristan Newton",
    
    "p/postqu01": "Quincy Post",
    "m/mcculke01": "Keshad McCullar",
    "c/chomcul01": "Cullen Chom",
    "c/coulibi01": "Bilal Coulibaly",
    "yabusgu01": "Guerschon Yabusele",
    "m/mccaija01": "Jared McCain",
    "m/merrisa01": "Seth Merritt",
    "j/jeromty01": "Ty Jerome",
    "h/huntede01": "De'Andre Hunter",
    "s/strusma01": "Max Strus",
    "m/mcbrimi01": "Miles McBride",
    "p/pritcpa01": "Payton Pritchard",
    "h/horfoal01": "Al Horford",
    
    "k/kornelu01": "Luke Kornet",
    "h/hausesa01": "Sam Hauser",
    "t/tillmxa01": "Xavier Tillman",
    "q/quetane01": "Neemias Queta",
    "s/scheiba01": "Baylor Scheierman",
    "d/davisjd01": "Johnny Davis",
    "p/peterdr01": "Drew Peterson",
    "w/walkelo01": "Lonnie Walker",
    "n/norrimi01": "Mike Norris",
    "c/craigto01": "Torrey Craig",
    "m/mathube01": "Bennedict Mathurin",
    "n/nembhan01": "Andrew Nembhard",
    "n/nesmiaa01": "Aaron Nesmith",
    
    "m/mccontj01": "T.J. McConnell",
    "j/jacksis01": "Isaiah Jackson",
    "s/sheppbe01": "Brandon Sheppard",
    "m/mcderdo01": "Doug McDermott",
    "t/tshieos01": "Saben Lee",
    "b/brownke03": "Kendrick Brown Jr.",
    "w/wongis01": "Isaiah Wong",
    "j/jacksqu01": "Quentin Jackson",
    "l/lopezbr01": "Brook Lopez",
    "p/portibo01": "Bobby Portis",
    "b/beaslma01": "Malik Beasley",
    "b/beaucma01": "MarJon Beauchamp",
    "g/greenaj01": "A.J Green",
    "c/crowdja01": "Jae Crowder",
    
    "c/connapa01": "Pat Connaughton",
    "r/rolliry01": "Ryan Rollins",
    "b/ballla01": "LaMelo Ball",
    "w/willima07": "Marcus Williams",
    "m/martico01": "Christopher Martin",
    "s/smithni01": "Nick Smith Jr.",
    "d/diabamo01": "Mohamed Diaby",
    "s/salauti01": "Elijah Salati",
    "m/micicva01": "Vasilije Micić",
    "j/jeffrda01": "David Jeffries",
    "r/richani01": "Nathan Richards",
    "m/manntr01": "Trey Mann",
    "o/okogijo01": "Olu Okogie",
    "b/baughda01": "Davion Baugh",
    "t/templga01": "Garett Temple",
    
    "p/poeltja01": "Jakob Poeltl",
    "b/barrerj01": "Juan Barrera",
    "d/dickgr01": "Greg Dickson",
    "m/moorewe01": "Wendell Moore Jr.",
    "f/flynnma01": "Malachi Flynn",
    "g/garrema01": "Marcus Garret",
    "p/paytoel01": "Elijah Payton",
    "g/gibsota01": "Gibson Sotorra",
    "r/rhodeja01": "Jacob Rhode",
    "b/batesem01": "Emoni Bates",
    "s/stevela01": "Leonard Steele",
    "c/clownno01": "Noah Clowney",
    "w/watfotr01": "Trevor Watson",

    'j/johnske04': 'Jaden Ivey',
    'v/vassede01': 'Devin Vassel',
    'm/mamuksa01': 'Mouhamed Mamukashvili',
    'w/weslebl01': 'Blake Wesley',
    's/sochaje01': 'Sochan Jeremy',
    'jonesde02': 'Derrick Jones',
    'p/powelno01': 'Norman Powell',
    'b/bogdabo01': 'Bogdan Bogdanović',
    'c/camarto01': 'Toumani Camara',
    'a/avdijde01': 'Deni Avdija',
    's/simonan01': 'Anfernee Simmons',
    'r/ruperra01': 'Rayan Ruppert',
    'w/williro04': 'Robert Williams',
    't/thybuma01': 'Matisse Thybulle',
    'b/blackan01': 'Anthony Black',

    'i/isaacjo01': 'Jonathan Isaac',
    'c/cartewe01': 'Wendell Carter Jr',
    'a/anthoco01': 'Cole Anthony',
    's/suggsja01': 'Jalen Suggs',
    'd/dinwisp01': 'Spencer Dinwiddie',
    't/thompkl01': 'Klay Thompson',
    'h/hardyja02': 'Jaden Hardy',
    'l/livelde01': 'Derreck Liviely',
    'g/gaffoda01': 'Daniel Gafford',
    'm/martica02': 'Caleb Martin',
    'p/poweldw01': 'Dwight Powell',
    'a/alexani01': 'Nickeil Alexander-Walker',
    'r/reidna01': 'Naz Reid',
    'c/conlemi01': 'Mike Conley',
    'd/divindo01': 'Donte Divincenzo',

    'd/dilliro01': 'Rob Dillingham',
    'b/brookdi01': 'Dillon Brooks',
    't/thompam01': 'Amen Thompson',
    'adamsst01': 'Steven Adams',
    's/smithja05': 'Jabari Smith',
    'w/whitmca01': 'Cameron Whitemore',
    'm/missiyv01': 'Yves Missi',
    'm/mccolcj01': 'Cj McCollum',
    'm/murphtr02': 'Trey Murphy',
    'b/brownbr01': 'Bruce Brown',
    'o/olynyke01': 'Kelly Olynik',
    'wellsja01': 'Jaylen Wells',
    'k/kennalu01': 'Luke Kennard',
    'c/clarkbr01': 'Brandon Clarke',
    'l/looneke01': 'Kevon Looney',
    'p/paytoga02': 'Gary Payton II',
    'k/kuminjo01': 'Jonathan Kuminga',
    'm/monkma01': 'Malik Monk',
    'v/valanjo01': 'Jonas Valanciunas',
    'l/laravja01': 'Jake LaRavia',
    'l/lylestr01': 'Trey Lyles',
    'm/murrake02': 'Keegan Murray',
    'g/georgke01': 'Keyonte George',
    's/sextoco01': 'Collin Sexton',
    'c/clarkjo01': 'Jordan Clarkson',
    'c/collijo01': 'John Collins',
    'm/mcdanja02': 'Jaden McDaniels',
    't/thomaca02': 'Cam Thomas',
    'j/johnsca02': 'Cameron Johnson',
    'w/willija07': 'Jaylin Williams',
    'h/harteis01': 'Isiah Harthenstein',
    'w/willija06': 'Jalen Williams',
    'j/joeis01': 'Isaiah Joe',
    'd/durenja01': 'Jalen Duren',
    't/thompau01': 'Ausar Thompson',
    'i/iveyja01': 'Jaden Ivey',
    's/stewais01': 'Isaiah Stewart',
    'h/harrito02': 'Tobias Harris',
    's/schrode01': 'Dennis Schröder',
    'b/braunch01': 'Christian Braun',
    'n/nnajize01': 'Zeke Nnaji',
    'w/watsope01': 'Peyton Watson',
    'j/jordade01': 'DeanDre Jordan',
    'w/whiteco01': 'Coby White',
    'w/willipa01': 'Patrick Williams',
    'd/dosunay01': 'Ayo Dosonmu',
    'j/jonestr01': 'Tre Jones',
    's/smithja04': 'Jalen Smith',
    's/smartma01': 'Marcus Smart',
    'p/poolejo01': 'Jordan Poole',
    'k/kispeco01': 'Corey Kispert',
    'c/champju01': 'Julian Champagnie',
    'r/reedpa01': 'Paul Reed',
    'b/batumni01': 'Nicolas Batum',
    'j/jaqueja01': 'Jaime Jaquez Jr.',
    'm/mitchda01': 'Davion Mitchell',
    'l/loveke01': 'Kevin Love',
    'a/anderky01': 'Kyle Anderson',
    's/sensabr01': 'Brice Sensabaugh',
    'j/juzanjo01': 'Johnny Juzang',

    's/shamela01': 'Laundry Shamet',
    'd/dunnkr01': 'Kris Dunn',
    'c/coffeam01': 'Amir Coffey',
    'o/okongon01': 'Onyeka Okongwu',
    'l/leverca01': 'Carris Levert',
    'm/mannte01': 'Terrance Mann',
    'j/johnske07': 'Keon Johnson',
    'w/willizi02': 'Ziaire Williams',
    'b/bitadgo01': 'Goga Bitadze',
    's/simmobe01': 'Ben Simmons',
    'h/holidaa01': 'Aaron Holiday',
    'e/easonta01': 'Tari Eason',
    's/strawju01': 'Julian Strawther',
    'w/wiggiaa01': 'Wiggins Aaron',
    'd/dortlu01': 'Luguentz Dort',
    'w/willike04': 'Kenrich Williams',

    'o/onealro01': 'Roice O’Neal',
    's/sharpda01': 'David Sharp',
    'c/curryse01': 'Seth Curry'
}


In [5]:
df=pd.read_csv("nba_game_logs_2025.csv")
df['player_name'] = df['player_id'].map(player_name_map)
df = df[~(df['Date'].isna() | (df['Team'] == 'Unknown'))]

# Display the updated DataFrame
df.to_csv("nba_game_logs_2025.csv")
df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Rk,Gcar,Gtm,Date,Team,Opp,Result,GS,...,AST,STL,BLK,TOV,PF,PTS,GmSc,+/-,player_id,player_name
0,0,0,1.0,1493.0,1.0,2024-10-22,LAL,MIN,"W, 110-103",*,...,4.0,0.0,2.0,2.0,3.0,16.0,10.1,-6.0,j/jamesle01,LeBron James
1,1,1,2.0,1494.0,2.0,2024-10-25,LAL,PHO,"W, 123-116",*,...,8.0,0.0,0.0,2.0,1.0,21.0,17.9,14.0,j/jamesle01,LeBron James
2,2,2,3.0,1495.0,3.0,2024-10-26,LAL,SAC,"W, 131-127",*,...,10.0,0.0,1.0,5.0,3.0,32.0,27.1,13.0,j/jamesle01,LeBron James
3,3,3,4.0,1496.0,4.0,2024-10-28,LAL,PHO,"L, 105-109",*,...,8.0,1.0,0.0,2.0,1.0,11.0,6.9,-17.0,j/jamesle01,LeBron James
4,4,4,5.0,1497.0,5.0,2024-10-30,LAL,CLE,"L, 110-134",*,...,3.0,0.0,0.0,6.0,2.0,26.0,18.0,-17.0,j/jamesle01,LeBron James
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23117,23406,23406,66.0,548.0,78.0,2025-04-06,CHO,CHI,"L, 117-131",1,...,2.0,0.0,0.0,0.0,2.0,10.0,9.0,-11.0,c/curryse01,Seth Curry
23118,23407,23407,67.0,549.0,79.0,2025-04-08,CHO,MEM,"L, 100-124",0,...,1.0,0.0,0.0,1.0,0.0,9.0,5.3,-10.0,c/curryse01,Seth Curry
23119,23408,23408,67.0,,80.0,2025-04-09,CHO,TOR,"L, 96-126",Did Not Play,...,,,,,,,,,c/curryse01,Seth Curry
23120,23409,23409,68.0,550.0,81.0,2025-04-11,CHO,BOS,"L, 94-130",0,...,0.0,1.0,0.0,1.0,1.0,17.0,14.0,-7.0,c/curryse01,Seth Curry
