In [None]:
from nba_api.stats.static import teams, players
from nba_api.stats.endpoints import (
    playercareerstats, playergamelog, teamgamelog,
    teamplayerdashboard, teamyearbyyearstats,
    leaguegamefinder, playerawards
)
import pandas as pd

In [None]:
from nba_api.stats.static import players, teams
from nba_api.stats.endpoints import (
    commonplayerinfo,
    playercareerstats,
    teamdetails,
    leaguegamefinder,
    playergamelog,
    commonteamroster,
)
import pandas as pd
from datetime import datetime

# Get All Active Players
active_players = players.get_active_players()


In [None]:
# Increase timeout (default = 30s)
import os
os.environ["NBA_API_DEBUG"] = "False"
import time
from requests.exceptions import ReadTimeout

player_info_list = []
draft_info_list = []
team_history_list = []
awards_info_list = []

# no limit get all players
for i, player in enumerate(active_players):
    pid = player["id"]
    name = player["full_name"]
    print(f"{i+1}. Fetching data for {name}...")

    try:
        info = commonplayerinfo.CommonPlayerInfo(player_id=pid, timeout=60).get_data_frames()[0]

        player_info = {
            "Player_ID": pid,
            "FullName": info.at[0, "DISPLAY_FIRST_LAST"],
            "FirstName": info.at[0, "FIRST_NAME"],
            "LastName": info.at[0, "LAST_NAME"],
            "Position": info.at[0, "POSITION"],
            "DateOfBirth": info.at[0, "BIRTHDATE"],
            "Height": info.at[0, "HEIGHT"],
            "Weight": info.at[0, "WEIGHT"],
            "SeasonExperience": info.at[0, "SEASON_EXP"]
        }
        player_info_list.append(player_info)
        print(f"Player info retrieved for {name}")

        draft_year = info.at[0, "DRAFT_YEAR"]
        if str(draft_year).lower() != "undrafted" and pd.notna(draft_year):
            draft_info = {
                "Draft_ID": f"{pid}-{draft_year}",
                "Player_ID": pid,
                'Player Name': name,
                "DraftYear": draft_year,
                "DraftRound": info.at[0, "DRAFT_ROUND"],
                "DraftPick": info.at[0, "DRAFT_NUMBER"],
                # need to get team name from the history, we will say its the first team in career history
                "DraftTeam": None,
                "DraftTeamID": None
            }
            draft_info_list.append(draft_info)
        else:
            print(f"Skipping draft info (undrafted player)")

        time.sleep(2.5)

        career = playercareerstats.PlayerCareerStats(player_id=pid, timeout=60).get_data_frames()[0]
        time.sleep(2.5)

        if career.empty:
            print(f"No team history found for {name}")
            continue

        # Sort by season and detect team changes
        career_sorted = career.sort_values("SEASON_ID").reset_index(drop=True)
        career_sorted["TeamChange"] = (career_sorted["TEAM_ID"] != career_sorted["TEAM_ID"].shift(1)).astype(int)
        career_sorted["Stint_ID"] = career_sorted["TeamChange"].cumsum()

        # Group by continuous stints with the same team
        stints = (
            career_sorted.groupby(["Stint_ID", "TEAM_ID", "TEAM_ABBREVIATION"], as_index=False)
            .agg(StartSeason=("SEASON_ID", "min"), EndSeason=("SEASON_ID", "max"))
        )

        # Add player info and mark current team
        stints["Player_ID"] = pid
        stints["Player_Name"] = name
        stints["IsCurrent"] = stints["TEAM_ID"] == info.at[0, "TEAM_ID"]

        team_history_list.append(stints)
        print(f"Team history retrieved for {name}")

        # --- INFER DRAFT TEAM FROM FIRST STINT ---
        if any(d["Player_ID"] == pid for d in draft_info_list):
            first_stint = stints.sort_values("StartSeason").iloc[0]
            for draft_info in draft_info_list:
                if draft_info["Player_ID"] == pid and draft_info["DraftTeam"] is None:
                    draft_info["DraftTeam"] = first_stint["TEAM_ABBREVIATION"]
                    draft_info["DraftTeamID"] = first_stint["TEAM_ID"]

        print(f"Team history retrieved for {name}")


        awards_df = playerawards.PlayerAwards(player_id=pid, timeout=60).get_data_frames()[0]
        print(f"Awards for {name}:")

        if awards_df.empty:
            print("No awards found.")
        else:
            # Filter for exact award names
            filtered_awards = awards_df[
                awards_df["DESCRIPTION"].isin(["NBA Most Valuable Player", "NBA All-Star"])
            ]

            if filtered_awards.empty:
                print("      No MVP or All-Star awards found.")
            else:
                # Build clean list of awards
                for _, row in filtered_awards.iterrows():
                    awards_info = {
                        "Player_ID": pid,
                        "Player_Name": name,
                        "Season": row["SEASON"],
                        "Award": row["DESCRIPTION"],
                        "Award_Type": (
                            "MVP" if row["DESCRIPTION"] == "NBA Most Valuable Player" else "All-Star"
                        ),
                    }
                    awards_info_list.append(awards_info)

                print(f"{len(filtered_awards)} qualifying awards added.")
        time.sleep(2.5)

    except ReadTimeout:
        print(f"Timeout for {name}, retrying later...")
        time.sleep(5)
        continue
    except Exception as e:
        print(f"Error fetching {name}: {e}")
        continue

    print("-" * 60)
    time.sleep(1.5)




player_info_df = pd.DataFrame(player_info_list)

draft_info_df = pd.DataFrame(draft_info_list)

team_history_df = (
    pd.concat(team_history_list, ignore_index=True)[
        ["Player_ID", "Player_Name", "TEAM_ID", "TEAM_ABBREVIATION", "StartSeason", "EndSeason", "IsCurrent"]
    ]
    .rename(columns={"TEAM_ABBREVIATION": "TeamAbbr"})
)

awards_info_df = pd.DataFrame(awards_info_list)


In [None]:
player_info_df

In [None]:
draft_info_df

In [None]:
team_history_df

In [None]:
awards_info_df

In [None]:
from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguestandingsv3
import pandas as pd

teams_df = pd.DataFrame(teams.get_teams())

standings_df = leaguestandingsv3.LeagueStandingsV3(season="2024-25").get_data_frames()[0]

standings_info = standings_df[[
    "TeamID", "Conference", "Division"
]].rename(columns={"TeamID": "id"})


merged_df = pd.merge(teams_df, standings_info, on="id", how="left")

merged_df = merged_df[[
    "id", "full_name", "abbreviation", "nickname", "city", "state","Conference", "Division"
]]

print("NBA Teams with Conference & Division Info:")

merged_df


In [None]:
# List of players that previously timed out
retry_players = [
    "Nikola Vučević", "Dean Wade", "Franz Wagner", "Moritz Wagner", "Jabari Walker",
    "Jarace Walker", "Lonnie Walker IV", "Cason Wallace", "Keaton Wallace", "Jordan Walsh",
    "Ja'Kobe Walter", "Kel'el Ware", "P.J. Washington", "TyTy Washington Jr.", "Landry Shamet",
    "Terrence Shannon Jr.", "Day'Ron Sharpe", "Shaedon Sharpe", "Jamal Shead", "Ben Sheppard",
    "Reed Sheppard", "Pascal Siakam", "Ben Simmons", "Anfernee Simons", "KJ Simpson",
    "Jaylen Sims", "Jericho Sims", "Marcus Smart", "Dru Smith", "Jalen Smith", "Tolu Smith",
    "Tyler Smith", "Jabari Smith Jr.", "Nick Smith Jr.", "Jeremy Sochan", "Cam Spencer",
    "Pat Spencer", "Jaden Springer", "Isaiah Stevens", "Lamar Stevens", "Richaun Holmes",
    "Chet Holmgren", "Jalen Hood-Schifino", "Al Horford", "Talen Horton-Tucker", "Caleb Houstan",
    "Jett Howard", "Kevin Huerter", "Jay Huff", "Ariel Hukporti", "De'Andre Hunter",
    "Bones Hyland", "Oso Ighodaro", "Joe Ingles", "Brandon Ingram", "Harrison Ingram",
    "Kyrie Irving", "Jonathan Isaac", "Jaden Ivey", "GG Jackson", "Isaiah Jackson",
    "Quenton Jackson", "Reggie Jackson", "Andre Jackson Jr.", "Jaren Jackson Jr.",
    "Trayce Jackson-Davis", "Bronny James", "LeBron James", "Jaime Jaquez Jr.", "DaQuan Jeffries",
    "Trey Jemison III", "Daniss Jenkins", "Ty Jerome", "Isaiah Joe", "AJ Johnson",
    "Cameron Johnson", "Jalen Johnson", "James Johnson", "Keldon Johnson"
]

# Filter only the retry players (case-insensitive)
retry_players_lower = [p.lower() for p in retry_players]
filtered_players = [
    p for p in active_players if p["full_name"].lower() in retry_players_lower
]

print(f"Found {len(filtered_players)} players to retry.")


player_info_list = []
draft_info_list = []
team_history_list = []
awards_info_list = []

# no limit get all players
for i, player in enumerate(filtered_players):
    pid = player["id"]
    name = player["full_name"]
    print(f"{i+1}. Fetching data for {name}...")

    try:
        info = commonplayerinfo.CommonPlayerInfo(player_id=pid, timeout=60).get_data_frames()[0]

        player_info = {
            "Player_ID": pid,
            "FullName": info.at[0, "DISPLAY_FIRST_LAST"],
            "FirstName": info.at[0, "FIRST_NAME"],
            "LastName": info.at[0, "LAST_NAME"],
            "Position": info.at[0, "POSITION"],
            "DateOfBirth": info.at[0, "BIRTHDATE"],
            "Height": info.at[0, "HEIGHT"],
            "Weight": info.at[0, "WEIGHT"],
            "SeasonExperience": info.at[0, "SEASON_EXP"]
        }
        player_info_list.append(player_info)
        print(f"Player info retrieved for {name}")

        draft_year = info.at[0, "DRAFT_YEAR"]
        if str(draft_year).lower() != "undrafted" and pd.notna(draft_year):
            draft_info = {
                "Draft_ID": f"{pid}-{draft_year}",
                "Player_ID": pid,
                'Player Name': name,
                "DraftYear": draft_year,
                "DraftRound": info.at[0, "DRAFT_ROUND"],
                "DraftPick": info.at[0, "DRAFT_NUMBER"],
                # need to get team name from the history, we will say its the first team in career history
                "DraftTeam": None,
                "DraftTeamID": None
            }
            draft_info_list.append(draft_info)
        else:
            print(f"Skipping draft info (undrafted player)")

        time.sleep(2.5)

        # --- CAREER TEAM HISTORY ---
        career = playercareerstats.PlayerCareerStats(player_id=pid, timeout=60).get_data_frames()[0]
        time.sleep(2.5)

        if career.empty:
            print(f"No team history found for {name}")
            continue

        # Sort by season and detect team changes
        career_sorted = career.sort_values("SEASON_ID").reset_index(drop=True)
        career_sorted["TeamChange"] = (career_sorted["TEAM_ID"] != career_sorted["TEAM_ID"].shift(1)).astype(int)
        career_sorted["Stint_ID"] = career_sorted["TeamChange"].cumsum()

        # Group by continuous stints with the same team
        stints = (
            career_sorted.groupby(["Stint_ID", "TEAM_ID", "TEAM_ABBREVIATION"], as_index=False)
            .agg(StartSeason=("SEASON_ID", "min"), EndSeason=("SEASON_ID", "max"))
        )

        # Add player info and mark current team
        stints["Player_ID"] = pid
        stints["Player_Name"] = name
        stints["IsCurrent"] = stints["TEAM_ID"] == info.at[0, "TEAM_ID"]

        team_history_list.append(stints)
        print(f"Team history retrieved for {name}")

        # --- INFER DRAFT TEAM FROM FIRST STINT ---
        if any(d["Player_ID"] == pid for d in draft_info_list):
            first_stint = stints.sort_values("StartSeason").iloc[0]
            for draft_info in draft_info_list:
                if draft_info["Player_ID"] == pid and draft_info["DraftTeam"] is None:
                    draft_info["DraftTeam"] = first_stint["TEAM_ABBREVIATION"]
                    draft_info["DraftTeamID"] = first_stint["TEAM_ID"]

        print(f"Team history retrieved for {name}")


        awards_df = playerawards.PlayerAwards(player_id=pid, timeout=60).get_data_frames()[0]
        print(f"Awards for {name}:")

        if awards_df.empty:
            print("No awards found.")
        else:
            # Filter for exact award names
            filtered_awards = awards_df[
                awards_df["DESCRIPTION"].isin(["NBA Most Valuable Player", "NBA All-Star"])
            ]

            if filtered_awards.empty:
                print("No MVP or All-Star awards found.")
            else:
                # Build clean list of awards
                for _, row in filtered_awards.iterrows():
                    awards_info = {
                        "Player_ID": pid,
                        "Player_Name": name,
                        "Season": row["SEASON"],
                        "Award": row["DESCRIPTION"],
                        "Award_Type": (
                            "MVP" if row["DESCRIPTION"] == "NBA Most Valuable Player" else "All-Star"
                        ),
                    }
                    awards_info_list.append(awards_info)

                print(f"{len(filtered_awards)} qualifying awards added.")
        time.sleep(2.5)

    except ReadTimeout:
        print(f"Timeout for {name}, retrying later...")
        time.sleep(5)
        continue
    except Exception as e:
        print(f"Error fetching {name}: {e}")
        continue

    print("-" * 60)
    time.sleep(1.5)


# --- CELL 3: COMBINE INTO DATAFRAMES ---

player_info_df = pd.DataFrame(player_info_list)

# Draft info as separate DataFrame
draft_info_df = pd.DataFrame(draft_info_list)

team_history_df = (
    pd.concat(team_history_list, ignore_index=True)[
        ["Player_ID", "Player_Name", "TEAM_ID", "TEAM_ABBREVIATION", "StartSeason", "EndSeason", "IsCurrent"]
    ]
    .rename(columns={"TEAM_ABBREVIATION": "TeamAbbr"})
)

awards_info_df = pd.DataFrame(awards_info_list)

In [None]:
team_history_df

In [None]:
import time
import pandas as pd
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog

START_SEASON = 2001
END_SEASON = 2025
MAX_RETRIES = 5
BASE_SLEEP = 1.5       # base delay between seasons
BACKOFF_FACTOR = 2     # exponential backoff multiplier

def safe_api_call(callable_func, *args, **kwargs):
    """
    Attempts an API call with exponential backoff for rate-limit errors.
    """
    for attempt in range(1, MAX_RETRIES + 1):
        try:
            return callable_func(*args, **kwargs)

        except Exception as e:
            print(f"Attempt {attempt}/{MAX_RETRIES} failed: {e}")

            if attempt < MAX_RETRIES:
                sleep_time = BASE_SLEEP * (BACKOFF_FACTOR ** (attempt - 1))
                print(f"Waiting {sleep_time:.1f}s before retrying...")
                time.sleep(sleep_time)
            else:
                print("Max retries reached — skipping.")
                return None

def fetch_season_avg(player_id, name, season):
    """
    Fetch season averages for one player in one season.
    """
    season_str = f"{season}-{str(season + 1)[-2:]}"
    print(f"   → Fetching {season_str}")

    gl = safe_api_call(
        playergamelog.PlayerGameLog,
        player_id=player_id,
        season=season_str
    )

    if gl is None:
        return None

    try:
        df = gl.get_data_frames()[0]

        if df.empty:
            print(f"No games found.")
            return None

        season_avg = df.mean(numeric_only=True)
        season_avg["Player_ID"] = player_id
        season_avg["Player_Name"] = name
        season_avg["Season"] = season_str

        print("Success")
        return season_avg

    except Exception as e:
        print(f"Data error: {e}")
        return None

season_averages_list = []

print(f"\nTotal active players: {len(active_players)}\n")

for player in active_players:
    pid = player["id"]
    name = player["full_name"]
    print(f"\nFetching season averages for {name}")

    for season in range(START_SEASON, END_SEASON):
        avg = fetch_season_avg(pid, name, season)
        if avg is not None:
            season_averages_list.append(avg)

        # Base sleep after each season call
        time.sleep(BASE_SLEEP)

season_averages_df = pd.DataFrame(season_averages_list)

print("\nDone!")
print("Total rows extracted:", len(season_averages_df))


In [None]:
season_averages_df

Unnamed: 0,Player_ID,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,...,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,Player_Name,Season
0,1630173.0,12.081967,2.032787,3.737705,0.514000,0.000000,0.016393,0.000000,0.918033,1.803279,...,0.475410,0.327869,0.459016,0.704918,1.491803,4.983607,-1.901639,1.000000,Precious Achiuwa,2020-21
1,1630173.0,23.684932,3.630137,8.260274,0.431370,0.767123,2.136986,0.255260,1.068493,1.794521,...,1.123288,0.506849,0.561644,1.150685,2.068493,9.095890,0.794521,1.000000,Precious Achiuwa,2021-22
2,1630173.0,20.781818,3.563636,7.345455,0.459564,0.527273,1.963636,0.194818,1.581818,2.254545,...,0.909091,0.563636,0.545455,1.072727,1.854545,9.236364,-0.690909,1.000000,Precious Achiuwa,2022-23
3,1630173.0,21.945946,3.175676,6.337838,0.485757,0.351351,1.310811,0.180162,0.932432,1.513514,...,1.310811,0.621622,0.918919,1.121622,1.932432,7.635135,-0.662162,1.000000,Precious Achiuwa,2023-24
4,1630173.0,20.491228,2.877193,5.736842,0.498842,0.175439,0.631579,0.109632,0.719298,1.210526,...,0.964912,0.824561,0.736842,0.789474,1.421053,6.649123,-0.842105,1.000000,Precious Achiuwa,2024-25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2920,203967.0,24.742424,3.878788,8.151515,0.446697,1.272727,3.560606,0.327636,1.636364,1.939394,...,1.863636,0.560606,0.242424,1.333333,2.363636,10.666667,-1.030303,1.015152,Dario Šarić,2019-20
2921,203967.0,17.360000,3.060000,6.840000,0.419740,0.940000,2.700000,0.276660,1.680000,1.980000,...,1.300000,0.600000,0.080000,1.140000,1.900000,8.740000,4.380000,1.000000,Dario Šarić,2020-21
2922,203967.0,14.157895,2.228070,4.859649,0.382246,0.789474,2.017544,0.280772,1.105263,1.333333,...,1.298246,0.350877,0.122807,0.964912,1.736842,6.350877,0.631579,1.000000,Dario Šarić,2022-23
2923,203967.0,17.140625,2.828125,6.062500,0.408500,1.156250,3.078125,0.303047,1.234375,1.453125,...,2.250000,0.484375,0.156250,1.218750,1.750000,8.046875,-0.312500,1.000000,Dario Šarić,2023-24
