Team information

In [None]:
from nba_api.stats.static import teams
import pandas as pd

OUTPUT_CSV = "team_information.csv"

df = pd.DataFrame(teams.get_teams())
df.to_csv(OUTPUT_CSV, index=False)

print(df.head())

In [None]:
from nba_api.stats.endpoints import leaguedashteamstats
import pandas as pd

def get_team_shooting_general(season='2024-25', season_type='Regular Season', per_mode='PerGame'):

    # Fetch team stats from the league dashboard
    team_stats = leaguedashteamstats.LeagueDashTeamStats(
        measure_type_detailed_defense='Base',
        per_mode_detailed=per_mode,
        season=season,
        season_type_all_star=season_type
    )
    
    # Get the dataframe
    df = team_stats.get_data_frames()[0]
    
    # Calculate 2-point stats (FG - 3P = 2P)
    df['FG2M'] = df['FGM'] - df['FG3M']  # 2-Point Field Goals Made
    df['FG2A'] = df['FGA'] - df['FG3A']  # 2-Point Field Goals Attempted
    df['FG2_PCT'] = (df['FG2M'] / df['FG2A'] * 100).round(1)  # 2-Point FG%
    
    # Calculate 2PT Frequency and 3PT Frequency
    df['FG2_FREQ'] = (df['FG2A'] / df['FGA'] * 100).round(1)  # % of FGA that are 2PT
    df['FG3_FREQ'] = (df['FG3A'] / df['FGA'] * 100).round(1)  # % of FGA that are 3PT
    
    # Calculate Effective FG% = (FGM + 0.5 * 3PM) / FGA
    df['EFG_PCT'] = ((df['FGM'] + 0.5 * df['FG3M']) / df['FGA'] * 100).round(1)
    
    # Convert percentages to match NBA.com format (multiply by 100)
    df['FG_PCT'] = (df['FG_PCT'] * 100).round(1)
    df['FG3_PCT'] = (df['FG3_PCT'] * 100).round(1)
    
    # Select and rename columns to match the exact table structure
    result_df = pd.DataFrame({
        'TEAM': df['TEAM_NAME'],
        'GP': df['GP'],
        'G': df['GP'],  # Games column (same as GP)
        'FREQ%': 100.0,  # Always 100% for overall field goals
        'FGM': df['FGM'],
        'FGA': df['FGA'],
        'FG%': df['FG_PCT'],
        'EFG%': df['EFG_PCT'],
        '2FG FREQ%': df['FG2_FREQ'],
        '2FGM': df['FG2M'],
        '2FGA': df['FG2A'],
        '2FG%': df['FG2_PCT'],
        '3FG FREQ%': df['FG3_FREQ'],
        '3PM': df['FG3M'],
        '3PA': df['FG3A'],
        '3P%': df['FG3_PCT']
    })
    
    # Sort by team name alphabetically (or you can sort by any stat)
    result_df = result_df.sort_values('TEAM')
    result_df = result_df.reset_index(drop=True)
    
    return result_df


# Example usage
if __name__ == "__main__":
    # Get current season data
    df = get_team_shooting_general(season='2024-25', per_mode='PerGame')
    
    # Display the table
    print("NBA Teams - Shooting Dashboard: General (2024-25 Season)")
    print("=" * 150)
    
    # Format display options for better readability
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 200)
    pd.set_option('display.max_rows', None)
    
    print(df.to_string(index=False))
    
    # Optional: Save to CSV
    # df.to_csv('nba_team_shooting_general.csv', index=False)
    
    # Optional: Filter to specific teams
    print("\n\nTop 5 teams by FG%:")
    print(df.nlargest(5, 'FG%')[['TEAM', 'FGM', 'FGA', 'FG%', 'EFG%']])
    
    print("\n\nTop 5 teams by 3P%:")
    print(df.nlargest(5, '3P%')[['TEAM', '3PM', '3PA', '3P%', '3FG FREQ%']])

Get shot zone data per player per season

In [None]:
# pip install nba_api pandas
import pandas as pd
from time import sleep
from nba_api.stats.endpoints import leaguedashteamstats
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# ---------- helpers ----------
def season_strings(first, last):
    return [f"{y}-{str((y+1)%100).zfill(2)}" for y in range(first, last+1)]

def norm_cols(df: pd.DataFrame) -> pd.DataFrame:
    # Make column names stable strings
    if hasattr(df, "columns"):
        df.columns = [str(c) for c in df.columns]
    return df

def select_existing(df: pd.DataFrame, cols):
    return df[[c for c in cols if c in df.columns]]

SEASONS = season_strings(2013, 2025)
SEASONS = [s for s in SEASONS if s <= "2024-25"]  # optional guard
SLEEP = 0.35  # gentle rate limit

# ---------- 1) League/Team pace & efficiencies ----------
team_adv_frames, team_base_frames = [], []

for s in SEASONS:
    # Advanced (Pace, OffRtg, etc.)
    adv = leaguedashteamstats.LeagueDashTeamStats(
        season=s,
        season_type_all_star="Regular Season",
        per_mode_detailed="PerGame",
        measure_type_detailed_defense="Advanced",
        league_id_nullable="00",
    ).get_data_frames()[0]
    adv = norm_cols(adv)
    adv["SEASON"] = s
    # keep common ID/context + all advanced metrics as returned
    keep_adv = ["SEASON","TEAM_ID","TEAM_NAME","TEAM_ABBREVIATION"]
    adv = adv.assign(**{} )  # noop; keep raw
    team_adv_frames.append(adv)
    sleep(SLEEP)

    # Base (counts/rates useful for joins)
    base = leaguedashteamstats.LeagueDashTeamStats(
        season=s,
        season_type_all_star="Regular Season",
        per_mode_detailed="PerGame",
        measure_type_detailed_defense="Base",
        league_id_nullable="00",
    ).get_data_frames()[0]
    base = norm_cols(base)
    base["SEASON"] = s
    keep_base = ["SEASON","TEAM_ID","TEAM_NAME","TEAM_ABBREVIATION"]
    base = base.assign(**{} )  # noop; keep raw
    team_base_frames.append(base)
    print(s)
    sleep(SLEEP)

team_advanced = pd.concat(team_adv_frames, ignore_index=True)
team_base = pd.concat(team_base_frames, ignore_index=True)

team_advanced.to_csv("team_advanced_by_season.csv", index=False)
team_base.to_csv("team_base_by_season.csv", index=False)

print(f"Saved team_advanced_by_season.csv ({len(team_advanced):,} rows)")
print(f"Saved team_base_by_season.csv ({len(team_base):,} rows)")

In [None]:
# pip install nba_api pandas
import pandas as pd
import time
from pathlib import Path
from nba_api.stats.endpoints import leaguedashteamstats, leaguedashptstats
from nba_api.stats.static import teams as static_teams

# ===================== CONFIG =====================
START_SEASON, END_SEASON = 2013, 2024
SEASONS = [f"{y}-{str((y+1)%100).zfill(2)}" for y in range(START_SEASON, END_SEASON + 1)]

# Run ALL measures listed here (each saved to its own CSV)
PT_MEASURES = [
    "SpeedDistance", "PullUpShot", "PostTouch",
    "Possessions", "Passing", "PaintTouch",
    "ElbowTouch", "Efficiency", "Drives",
    "CatchShoot
]

PER_MODE = "PerGame"     # or "Totals"
HTTP_TIMEOUT = 60        # seconds per request
RETRIES = 3
BASE_SLEEP = 0.8
BACKOFF_MULTIPLIER = 2.0

OUT_DIR = Path("nba_tracking_data")
OUT_DIR.mkdir(exist_ok=True)
# ==================================================

# ----- Static team lookup (TEAM_ID -> abbrev/name) -----
def build_team_lookup():
    all_teams = static_teams.get_teams()
    return {t["id"]: {"abbreviation": t["abbreviation"], "full_name": t["full_name"]} for t in all_teams}

TEAM_LOOKUP = build_team_lookup()

# ----- Helpers -----
def get_teams_for_season(season: str) -> pd.DataFrame:
    """
    Unique NBA teams for a season (TEAM_ID, TEAM_ABBREVIATION, TEAM_NAME).
    Uses LeagueDashTeamStats and stamps abbrev/name from static lookup to keep consistent.
    """
    try:
        df = leaguedashteamstats.LeagueDashTeamStats(
            season=season,
            season_type_all_star="Regular Season",
            per_mode_detailed="PerGame",
            measure_type_detailed_defense="Base",
            league_id_nullable="00",
            timeout=HTTP_TIMEOUT,
        ).get_data_frames()[0]

        # normalize possible column name variants
        rename = {"Team ID": "TEAM_ID", "Team Name": "TEAM_NAME", "Team Abbreviation": "TEAM_ABBREVIATION"}
        df.rename(columns={k: v for k, v in rename.items() if k in df.columns}, inplace=True)

        teams = df[["TEAM_ID"]].drop_duplicates().reset_index(drop=True)
        teams["TEAM_ABBREVIATION"] = teams["TEAM_ID"].apply(lambda tid: TEAM_LOOKUP.get(tid, {}).get("abbreviation", ""))
        teams["TEAM_NAME"] = teams["TEAM_ID"].apply(lambda tid: TEAM_LOOKUP.get(tid, {}).get("full_name", f"Team {tid}"))
        return teams
    except Exception as e:
        print(f"  ✗ Error getting teams for {season}: {e}")
        return pd.DataFrame(columns=["TEAM_ID", "TEAM_ABBREVIATION", "TEAM_NAME"])

def fetch_tracking_data(season: str, team_id: int, measure: str, attempt: int = 1) -> pd.DataFrame:
    """Fetch LeagueDashPtStats player rows for one team+season (with retries/backoff)."""
    delay = BASE_SLEEP * (BACKOFF_MULTIPLIER ** (attempt - 1))
    try:
        df = leaguedashptstats.LeagueDashPtStats(
            season=season,
            season_type_all_star="Regular Season",
            per_mode_simple=PER_MODE,
            player_or_team="Player",
            pt_measure_type=measure,
            team_id_nullable=team_id,     # key to split traded stints
            league_id_nullable="00",
            timeout=HTTP_TIMEOUT,
        ).get_data_frames()[0]
        return df if (df is not None and not df.empty) else pd.DataFrame()
    except Exception as e:
        if attempt < RETRIES:
            print(f"    ⚠ Attempt {attempt} failed (team {team_id}, {measure}), retrying in {delay:.1f}s...")
            time.sleep(delay)
            return fetch_tracking_data(season, team_id, measure, attempt + 1)
        print(f"    ✗ Failed after {RETRIES} attempts (season={season}, team_id={team_id}, measure={measure}): {e}")
        return pd.DataFrame()

def load_done_pairs(csv_path: Path) -> set:
    """Resume support per measure-file: set of (SEASON, TEAM_ID) already written."""
    if csv_path.exists():
        try:
            existing = pd.read_csv(csv_path, usecols=["SEASON", "TEAM_ID"])
            existing["TEAM_ID"] = existing["TEAM_ID"].astype(str)
            return set(zip(existing["SEASON"].astype(str), existing["TEAM_ID"]))
        except Exception:
            return set()
    return set()

# ----- Main: loop all measures -----
for PT_MEASURE in PT_MEASURES:
    OUTPUT_CSV = OUT_DIR / f"tracking_{PT_MEASURE.lower()}.csv"
    done_pairs = load_done_pairs(OUTPUT_CSV)
    header_written = OUTPUT_CSV.exists()

    print(f"\n{'='*60}")
    print(f"Collecting {PT_MEASURE} tracking data")
    print(f"Seasons: {SEASONS[0]} to {SEASONS[-1]} | Mode: {PER_MODE}")
    print(f"Output: {OUTPUT_CSV}")
    print('='*60)

    total_rows_added = 0

    for season in SEASONS:
        print(f"\n{season}:")
        teams = get_teams_for_season(season)
        if teams.empty:
            print("  ✗ No teams found, skipping season")
            time.sleep(BASE_SLEEP)
            continue

        print(f"  Processing {len(teams)} teams...")
        season_rows = 0

        for _, team in teams.iterrows():
            tid = int(team["TEAM_ID"])
            tid_str = str(tid)
            tabbr = team.get("TEAM_ABBREVIATION", "")
            tname = team.get("TEAM_NAME", f"Team {tid_str}")

            # Skip if this (season, team) already saved for this measure
            if (season, tid_str) in done_pairs:
                continue

            df = fetch_tracking_data(season, tid, PT_MEASURE)

            if not df.empty:
                # Stamp stint metadata from the loop (do not trust API team fields for traded players)
                df["SEASON"] = season
                df["PT_MEASURE_TYPE"] = PT_MEASURE
                df["TEAM_ID"] = tid
                df["TEAM_ABBREVIATION"] = tabbr
                df["TEAM_NAME"] = tname

                # Write incrementally
                df.to_csv(
                    OUTPUT_CSV,
                    mode=("a" if header_written else "w"),
                    index=False,
                    header=not header_written
                )
                header_written = True
                done_pairs.add((season, tid_str))

                season_rows += len(df)
                total_rows_added += len(df)

                print(f"    ✓ {tabbr or tname}: {len(df)} players")
            else:
                print(f"    - {tabbr or tname}: no data")

            time.sleep(BASE_SLEEP)

        print(f"  Season total: {season_rows} rows added")

    print(f"\n✅ Done {PT_MEASURE}: +{total_rows_added:,} rows → {OUTPUT_CSV}")

# ---------- Optional: quick QA across all files ----------
print("\nAll measures complete.")
print("Tip: UNION all tracking_*.csv files on (SEASON, TEAM_ID, PLAYER_ID, PT_MEASURE_TYPE) for analysis.")


In [None]:
# pip install nba_api pandas
import pandas as pd
import time
from pathlib import Path
from nba_api.stats.endpoints import shotchartdetail, leaguedashteamstats
from nba_api.stats.static import teams as static_teams

# ===================== CONFIG =====================
START_SEASON, END_SEASON = 2013, 2024
SEASONS = [f"{y}-{str((y+1)%100).zfill(2)}" for y in range(START_SEASON, END_SEASON + 1)]
SEASON_TYPE = "Regular Season"   # or "Playoffs"
HTTP_TIMEOUT = 60
BASE_SLEEP = 0.35
RETRIES = 3
OUT_PATH = Path("player_shooting_by_team_season__zones.csv")
# ==================================================

# Team lookup for stable names/abbrevs
TEAM_LOOKUP = {t["id"]: {"abbreviation": t["abbreviation"], "full_name": t["full_name"]}
               for t in static_teams.get_teams()}

def get_teams_for_season(season: str) -> pd.DataFrame:
    df = leaguedashteamstats.LeagueDashTeamStats(
        season=season,
        season_type_all_star=SEASON_TYPE,
        per_mode_detailed="PerGame",
        measure_type_detailed_defense="Base",
        league_id_nullable="00",
        timeout=HTTP_TIMEOUT,
    ).get_data_frames()[0]
    ren = {"Team ID":"TEAM_ID","Team Name":"TEAM_NAME","Team Abbreviation":"TEAM_ABBREVIATION"}
    df.rename(columns={k:v for k,v in ren.items() if k in df.columns}, inplace=True)
    teams = df[["TEAM_ID"]].drop_duplicates().reset_index(drop=True)
    teams["TEAM_ABBREVIATION"] = teams["TEAM_ID"].apply(lambda tid: TEAM_LOOKUP.get(tid,{}).get("abbreviation",""))
    teams["TEAM_NAME"] = teams["TEAM_ID"].apply(lambda tid: TEAM_LOOKUP.get(tid,{}).get("full_name", f"Team {tid}"))
    return teams

def fetch_team_shots(season: str, team_id: int, attempt: int = 1) -> pd.DataFrame:
    try:
        shots = shotchartdetail.ShotChartDetail(
            team_id=team_id,
            player_id=0,  # all players on this team
            season_type_all_star=SEASON_TYPE,
            season_nullable=season,
            context_measure_simple="FGA",
            timeout=HTTP_TIMEOUT,
        ).get_data_frames()[0]
        return shots if isinstance(shots, pd.DataFrame) and not shots.empty else pd.DataFrame()
    except Exception:
        if attempt < RETRIES:
            time.sleep(BASE_SLEEP * (2 ** (attempt - 1)))
            return fetch_team_shots(season, team_id, attempt + 1)
        return pd.DataFrame()

def aggregate_player_zone_rows(shots: pd.DataFrame, season: str, team_row: pd.Series) -> pd.DataFrame:
    # Keep only what we need
    need = ["PLAYER_ID","PLAYER_NAME","SHOT_MADE_FLAG",
            "SHOT_ZONE_BASIC","SHOT_ZONE_AREA","SHOT_ZONE_RANGE"]
    shots = shots[[c for c in need if c in shots.columns]].copy()
    # Ensure zone columns exist (if missing for some odd season)
    for c in ["SHOT_ZONE_BASIC","SHOT_ZONE_AREA","SHOT_ZONE_RANGE"]:
        if c not in shots.columns:
            shots[c] = "Unknown"

    group_cols = ["PLAYER_ID","PLAYER_NAME","SHOT_ZONE_BASIC","SHOT_ZONE_AREA","SHOT_ZONE_RANGE"]
    agg = (shots.groupby(group_cols, as_index=False)
                 .agg(FGM=("SHOT_MADE_FLAG","sum"),
                      FGA=("SHOT_MADE_FLAG","count")))
    agg["FG_PCT"] = agg["FGM"] / agg["FGA"]

    # Stamp stint metadata
    agg["SEASON"] = season
    agg["TEAM_ID"] = int(team_row["TEAM_ID"])
    agg["TEAM_ABBREVIATION"] = team_row.get("TEAM_ABBREVIATION","")
    agg["TEAM_NAME"] = team_row.get("TEAM_NAME","")

    # Order columns
    cols = ["SEASON","TEAM_ID","TEAM_ABBREVIATION","TEAM_NAME",
            "PLAYER_ID","PLAYER_NAME",
            "SHOT_ZONE_BASIC","SHOT_ZONE_AREA","SHOT_ZONE_RANGE",
            "FGM","FGA","FG_PCT"]
    return agg[cols]

# ------------------ RUN ------------------
all_chunks = []
print(f"Building zone-level player stints ({SEASON_TYPE}) → {SEASONS[0]}..{SEASONS[-1]}")

for s in SEASONS:
    print(f"\n{s}: fetching teams & shots…")
    teams = get_teams_for_season(s)
    print(f"  Teams: {len(teams)}")

    season_rows = []
    for _, trow in teams.iterrows():
        tid = int(trow["TEAM_ID"])
        shots = fetch_team_shots(s, tid)
        if shots.empty:
            print(f"   - {trow['TEAM_ABBREVIATION'] or trow['TEAM_NAME']}: no shots")
            time.sleep(BASE_SLEEP)
            continue

        chunk = aggregate_player_zone_rows(shots, s, trow)
        season_rows.append(chunk)
        print(f"   ✓ {trow['TEAM_ABBREVIATION'] or trow['TEAM_NAME']}: {len(chunk)} rows")
        time.sleep(BASE_SLEEP)

    if season_rows:
        all_chunks.append(pd.concat(season_rows, ignore_index=True))

final = (pd.concat(all_chunks, ignore_index=True)
         if all_chunks else
         pd.DataFrame(columns=["SEASON","TEAM_ID","TEAM_ABBREVIATION","TEAM_NAME",
                               "PLAYER_ID","PLAYER_NAME",
                               "SHOT_ZONE_BASIC","SHOT_ZONE_AREA","SHOT_ZONE_RANGE",
                               "FGM","FGA","FG_PCT"]))

final.to_csv(OUT_PATH, index=False)
print(f"\nSaved: {OUT_PATH} (rows={len(final):,}, cols={len(final.columns)})")


In [None]:
# pip install nba_api pandas
import pandas as pd
import time
from pathlib import Path
from nba_api.stats.endpoints import leaguedashplayerstats, commonplayerinfo

# ===================== CONFIG =====================
START_SEASON, END_SEASON = 2013, 2024
SEASONS = [f"{y}-{str((y+1)%100).zfill(2)}" for y in range(START_SEASON, END_SEASON + 1)]
SEASON_TYPE = "Regular Season"
HTTP_TIMEOUT = 60
BASE_SLEEP = 0.5
RETRIES = 3
OUT_PATH = Path("player_bio_common.csv")
BATCH_SIZE = 100  # Write every N players
# ==================================================

def get_player_ids_for_season(season: str) -> list[int]:
    """Return list of PLAYER_IDs active for a given season."""
    try:
        df = leaguedashplayerstats.LeagueDashPlayerStats(
            season=season,
            season_type_all_star=SEASON_TYPE,
            per_mode_detailed="PerGame",
            measure_type_detailed_defense="Base",
            league_id_nullable="00",
            timeout=HTTP_TIMEOUT,
        ).get_data_frames()[0]
        return df["PLAYER_ID"].unique().tolist()
    except Exception as e:
        print(f"  ✗ Error getting player list for {season}: {e}")
        return []

def fetch_player_info(pid: int, attempt: int = 1) -> pd.DataFrame:
    """Fetch CommonPlayerInfo for one player ID with retry logic."""
    try:
        info = commonplayerinfo.CommonPlayerInfo(player_id=pid, timeout=HTTP_TIMEOUT)
        df = info.get_data_frames()[0]
        return df if not df.empty else pd.DataFrame()
    except Exception as e:
        if attempt < RETRIES:
            time.sleep(BASE_SLEEP * (2 ** (attempt - 1)))
            return fetch_player_info(pid, attempt + 1)
        print(f"    ✗ Failed for PLAYER_ID={pid}: {e}")
        return pd.DataFrame()

# Resume support: track which player IDs are already done
if OUT_PATH.exists():
    existing = pd.read_csv(OUT_PATH, usecols=["PERSON_ID"])
    done_players = set(existing["PERSON_ID"])
    header_written = True
else:
    done_players = set()
    header_written = False

print(f"Collecting unique player bios from {SEASONS[0]}–{SEASONS[-1]}")

# OPTIMIZATION 1: Collect ALL unique player IDs across all seasons
print("\nGathering unique players across all seasons...")
all_player_ids = set()

for s in SEASONS:
    ids = get_player_ids_for_season(s)
    if ids:
        print(f"  {s}: {len(ids)} players")
        all_player_ids.update(ids)
    time.sleep(BASE_SLEEP)

print(f"\nTotal unique players: {len(all_player_ids)}")

# Filter out already-completed players
players_to_fetch = all_player_ids - done_players
print(f"Players to fetch (skipping {len(done_players)} already done): {len(players_to_fetch)}")

# OPTIMIZATION 2: Batch writes
print(f"\nFetching player bios...")
batch_buffer = []
total_rows = 0

for idx, pid in enumerate(players_to_fetch):
    df = fetch_player_info(pid)
    
    if df.empty:
        continue
    
    batch_buffer.append(df)
    done_players.add(pid)
    
    # Progress update
    if (idx + 1) % 50 == 0:
        print(f"  Progress: {idx + 1}/{len(players_to_fetch)} ({100*(idx+1)/len(players_to_fetch):.1f}%)")
    
    # Batch write to reduce I/O
    if len(batch_buffer) >= BATCH_SIZE:
        combined = pd.concat(batch_buffer, ignore_index=True)
        combined.to_csv(OUT_PATH, mode=("a" if header_written else "w"),
                       index=False, header=not header_written)
        header_written = True
        total_rows += len(combined)
        print(f"    ✓ Wrote batch ({len(combined)} rows, {total_rows:,} total)")
        batch_buffer = []
    
    time.sleep(BASE_SLEEP)

# Write remaining records
if batch_buffer:
    combined = pd.concat(batch_buffer, ignore_index=True)
    combined.to_csv(OUT_PATH, mode=("a" if header_written else "w"),
                   index=False, header=not header_written)
    total_rows += len(combined)
    print(f"    ✓ Wrote final batch ({len(combined)} rows, {total_rows:,} total)")

print(f"\n✅ Done. Saved: {OUT_PATH} ({total_rows:,} total rows)")
print(f"Unique players: {len(done_players)}")