In [1]:
import pandas as pd
import streamlit as st
import pandas as pd
import altair as alt
from nba_api.stats.endpoints import LeagueDashPlayerStats
import streamlit.components.v1 as components

In [2]:
def load_data(season):
    # read in the correct season's stats
    player_stats_readin = LeagueDashPlayerStats(season=season, season_type_all_star="Regular Season")

    # convert it into a dataframe
    players_raw_df = player_stats_readin.get_data_frames()[0]

    # convert the dataframe to a pandas dataframe
    players_df = pd.DataFrame(players_raw_df)

    # drop the columns that are not needed
    players_df = players_df.drop(columns=["NICKNAME", "WNBA_FANTASY_PTS", "WNBA_FANTASY_PTS_RANK"])
    return players_df

def prepare_dataset(data):
    # add picture links to columns
    url = "https://cdn.nba.com/headshots/nba/latest/1040x760/{}.png"
    # set the image column
    data["IMAGE"] = data["PLAYER_ID"].apply(lambda pid: url.format(pid))


    # choose which columns the drop
    columns_to_drop = [
        "PLAYER_ID", "TEAM_ID", "TEAM_ABBREVIATION", "AGE",
        "GP_RANK", "W_RANK", "L_RANK", "W_PCT_RANK", "MIN_RANK", "FGM_RANK",
        "FGA_RANK", "FG_PCT_RANK", "FG3M_RANK", "FG3A_RANK", "FG3_PCT_RANK",
        "FTM_RANK", "FTA_RANK", "FT_PCT_RANK", "OREB_RANK", "DREB_RANK",
        "REB_RANK", "AST_RANK", "TOV_RANK", "STL_RANK", "BLK_RANK", "BLKA_RANK",
        "PF_RANK", "PFD_RANK", "PTS_RANK", "PLUS_MINUS_RANK",
        "NBA_FANTASY_PTS_RANK", "DD2_RANK", "TD3_RANK", "GP", "W_PCT"
    ]
    # rename the columns for better user experience and understanding
    rename_dict = {
        "PLAYER_NAME": "Name",
        "W": "Wins",
        "L": "Losses",
        "MIN": "Minutes Played",
        "FGM": "Field Goals Made",
        "FGA": "Field Goals Attempted",
        "FG_PCT": "Field Goal %",
        "FG3M": "Three-Pointers Made",
        "FG3A": "Three-Pointers Attempted",
        "FG3_PCT": "Three-Pointers %",
        "FTM": "Free Throws Made",
        "FTA": "Free Throws Attempted",
        "FT_PCT": "Free Throw %",
        "OREB": "Offensive Rebounds",
        "DREB": "Defensive Rebounds",
        "REB": "Total Rebounds",
        "AST": "Assists",
        "TOV": "Turnovers",
        "STL": "Steals",
        "BLK": "Blocks",
        "BLKA": "Blocked Field Goal Attempts",
        "PF": "Personal Fouls",
        "PFD": "Personal Fouls Drawn",
        "PTS": "Points Scored",
        "PLUS_MINUS": "Plus-Minus Rating",
        "NBA_FANTASY_PTS": "NBA Fantasy Points",
        "DD2": "Double-Doubles",
        "TD3": "Triple-Doubles"
    }

    # make a stats df that has everything we need and drop the previously defined not needed columns
    data = data.drop(columns=columns_to_drop)
    # rename columns
    data = data.rename(columns=rename_dict)
    # create a total games column
    data["Total Games"] = data["Wins"] + data["Losses"]
    data["Minutes / Game"] = (data["Minutes Played"] / data["Total Games"]).round(1)
    data["FG Made / Game"] = (data["Field Goals Made"] / data["Total Games"]).round(1)
    data["FG Attempted / Game"] = (data["Field Goals Attempted"] / data["Total Games"]).round(1)
    data["3PTs Made / Game"] = (data["Three-Pointers Made"] / data["Total Games"]).round(1)
    data["3PTs Attempted / Game"] = (data["Three-Pointers Attempted"] / data["Total Games"]).round(1)
    data["FTs Made / Game"] = (data["Free Throws Made"] / data["Total Games"]).round(1)
    data["FTs Attempted / Game"] = (data["Free Throws Attempted"] / data["Total Games"]).round(1)
    data["Offensive Rebounds / Game"] = (data["Offensive Rebounds"] / data["Total Games"]).round(1)
    data["Defensive Rebounds / Game"] = (data["Defensive Rebounds"] / data["Total Games"]).round(1)
    data["Rebounds / Game"] = (data["Total Rebounds"] / data["Total Games"]).round(1)
    data["Assists / Game"] = (data["Assists"] / data["Total Games"]).round(1)
    data["Turnovers / Game"] = (data["Turnovers"] / data["Total Games"]).round(1)
    data["Steals / Game"] = (data["Steals"] / data["Total Games"]).round(1)
    data["Blocks / Game"] = (data["Blocks"] / data["Total Games"]).round(1)
    data["Blocked FG Attempts / Game"] = (data["Blocked Field Goal Attempts"] / data["Total Games"]).round(1)
    data["Personal Fouls / Game"] = (data["Personal Fouls"] / data["Total Games"]).round(1)
    data["Personal Fouls Drawn / Game"] = (data["Personal Fouls Drawn"] / data["Total Games"]).round(1)
    data["Points / Game"] = (data["Points Scored"] / data["Total Games"]).round(1)
    data["Plus-Minus / Game"] = (data["Plus-Minus Rating"] / data["Total Games"]).round(1)

    # make criteria for 3pt % depending on 3pt attempts
    over_10_3_attempts = data["Three-Pointers Attempted"] > 150
    data["Three-Pointers %"] = data[over_10_3_attempts]["Three-Pointers %"] * 100

    # make criteria for FG % depending on FG attempts
    over_10_fg_attempts = data["Field Goals Attempted"] > 150
    data["Field Goal %"] = data[over_10_fg_attempts]["Field Goal %"] * 100
    data = data.set_index("Name")
    return data

import time
time.sleep(1)
a1996_1997 = prepare_dataset(load_data("1996-97"))
time.sleep(1)
a1997_1998 = prepare_dataset(load_data("1997-98"))
time.sleep(1)
a1998_1999 = prepare_dataset(load_data("1998-99"))
time.sleep(1)
a1999_2000 = prepare_dataset(load_data("1999-00"))
time.sleep(1)
a2000_2001 = prepare_dataset(load_data("2000-01"))
time.sleep(1)
a2001_2002 = prepare_dataset(load_data("2001-02"))
time.sleep(1)
a2002_2003 = prepare_dataset(load_data("2002-03"))
time.sleep(1)
a2003_2004 = prepare_dataset(load_data("2003-04"))
time.sleep(1)
a2004_2005 = prepare_dataset(load_data("2004-05"))
time.sleep(1)
a2005_2006 = prepare_dataset(load_data("2005-06"))
time.sleep(1)
a2006_2007 = prepare_dataset(load_data("2006-07"))
time.sleep(1)
a2007_2008 = prepare_dataset(load_data("2007-08"))
time.sleep(1)
a2008_2009 = prepare_dataset(load_data("2008-09"))
time.sleep(1)
a2009_2010 = prepare_dataset(load_data("2009-10"))
time.sleep(1)
a2010_2011 = prepare_dataset(load_data("2010-11"))
time.sleep(1)
a2011_2012 = prepare_dataset(load_data("2011-12"))
time.sleep(1)
a2012_2013 = prepare_dataset(load_data("2012-13"))
time.sleep(1)
a2013_2014 = prepare_dataset(load_data("2013-14"))
time.sleep(1)
a2014_2015 = prepare_dataset(load_data("2014-15"))
time.sleep(1)
a2015_2016 = prepare_dataset(load_data("2015-16"))
time.sleep(1)
a2016_2017 = prepare_dataset(load_data("2016-17"))
time.sleep(1)
a2017_2018 = prepare_dataset(load_data("2017-18"))
time.sleep(1)
a2018_2019 = prepare_dataset(load_data("2018-19"))
time.sleep(1)
a2019_2020 = prepare_dataset(load_data("2019-20"))
time.sleep(1)
a2020_2021 = prepare_dataset(load_data("2020-21"))
time.sleep(1)
a2021_2022 = prepare_dataset(load_data("2021-22"))
time.sleep(1)
a2022_2023 = prepare_dataset(load_data("2022-23"))
time.sleep(1)
a2023_2024 = prepare_dataset(load_data("2023-24"))
time.sleep(1)
a2024_2025 = prepare_dataset(load_data("2024-25"))

In [3]:
# Combine all datasets into a single DataFrame
combined_df = pd.concat([
    a1996_1997.assign(Season="1996-97"),
    a1997_1998.assign(Season="1997-98"),
    a1998_1999.assign(Season="1998-99"),
    a1999_2000.assign(Season="1999-00"),
    a2000_2001.assign(Season="2000-01"),
    a2001_2002.assign(Season="2001-02"),
    a2002_2003.assign(Season="2002-03"),
    a2003_2004.assign(Season="2003-04"),
    a2004_2005.assign(Season="2004-05"),
    a2005_2006.assign(Season="2005-06"),
    a2006_2007.assign(Season="2006-07"),
    a2007_2008.assign(Season="2007-08"),
    a2008_2009.assign(Season="2008-09"),
    a2009_2010.assign(Season="2009-10"),
    a2010_2011.assign(Season="2010-11"),
    a2011_2012.assign(Season="2011-12"),
    a2012_2013.assign(Season="2012-13"),
    a2013_2014.assign(Season="2013-14"),
    a2014_2015.assign(Season="2014-15"),
    a2015_2016.assign(Season="2015-16"),
    a2016_2017.assign(Season="2016-17"),
    a2017_2018.assign(Season="2017-18"),
    a2018_2019.assign(Season="2018-19"),
    a2019_2020.assign(Season="2019-20"),
    a2020_2021.assign(Season="2020-21"),
    a2021_2022.assign(Season="2021-22"),
    a2022_2023.assign(Season="2022-23"),
    a2023_2024.assign(Season="2023-24"),
    a2024_2025.assign(Season="2024-25")
])

# Save the combined DataFrame to a CSV file
combined_df.to_csv("combined_nba_stats.csv", index=True)

In [4]:
df = pd.read_csv("combined_nba_stats.csv")
df

Unnamed: 0,Name,Wins,Losses,Minutes Played,Field Goals Made,Field Goals Attempted,Field Goal %,Three-Pointers Made,Three-Pointers Attempted,Three-Pointers %,...,Assists / Game,Turnovers / Game,Steals / Game,Blocks / Game,Blocked FG Attempts / Game,Personal Fouls / Game,Personal Fouls Drawn / Game,Points / Game,Plus-Minus / Game,Season
0,A.C. Green,23,60,2494.298333,234,484,48.3,1,20,,...,0.8,0.9,0.8,0.2,0.5,1.7,0.0,7.2,-4.2,1996-97
1,Aaron McKie,48,35,1623.911667,150,365,41.1,41,103,,...,1.9,1.1,0.9,0.3,0.4,1.6,0.0,5.2,1.6,1996-97
2,Aaron Williams,4,29,562.423333,85,148,,0,1,,...,0.5,1.0,0.5,0.9,0.2,2.2,0.0,6.2,-3.2,1996-97
3,Acie Earl,14,33,500.141667,67,179,37.4,0,5,,...,0.4,0.7,0.3,0.6,0.2,1.3,0.0,4.0,-1.3,1996-97
4,Adam Keefe,48,14,916.788333,82,160,51.3,0,1,,...,0.5,0.7,0.5,0.2,0.2,1.6,0.0,3.8,2.0,1996-97
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13976,Zach LaVine,28,38,2299.196667,548,1073,51.1,202,466,43.3,...,4.2,2.9,0.8,0.2,0.6,1.5,3.1,22.9,-0.8,2024-25
13977,Zeke Nnaji,35,18,560.928333,68,134,,16,48,,...,0.4,0.2,0.4,0.7,0.2,1.1,0.7,3.3,-0.1,2024-25
13978,Ziaire Williams,20,39,1460.136667,201,495,40.6,97,287,33.8,...,1.4,1.1,1.0,0.4,0.6,2.4,1.8,10.0,-5.5,2024-25
13979,Zion Williamson,10,20,857.276667,288,508,56.7,3,13,,...,5.3,3.0,1.2,0.9,2.0,2.7,5.7,24.6,-1.4,2024-25
