In [1]:
import hvplot.pandas
import matplotlib as mpl
import pandas as pd

from nba.teams import team_colors

In [2]:
df = pd.read_csv('../data/all_time_league_leaders_2025.03.08.csv')

In [48]:
all_time_scorers_single_team = df.groupby(['PLAYER', 'PLAYER_ID', 'TEAM'])[['PTS']].sum().reset_index().sort_values('PTS', ascending=False).drop_duplicates(subset=['PLAYER', 'PLAYER_ID', 'TEAM'], keep='first')
player_team_map = all_time_scorers_single_team.drop_duplicates(subset=['PLAYER', 'PLAYER_ID'], keep='first').set_index('PLAYER')['TEAM']

In [53]:
player_team_map

PLAYER
Kobe Bryant        LAL
Dirk Nowitzki      DAL
Michael Jordan     CHI
Hakeem Olajuwon    HOU
Tim Duncan         SAS
                  ... 
Randell Jackson    DAL
Brandin Knight     HOU
Larry Sykes        BOS
Bobi Klintman      DET
Zyon Pullin        MEM
Name: TEAM, Length: 4618, dtype: object

In [54]:
all_time_scorers = df.groupby(['PLAYER', 'PLAYER_ID'])[['PTS']].sum().nlargest(10, columns=['PTS']).sort_values(by='PTS').droplevel(1)

all_time_scorers['TEAM'] = all_time_scorers.index.map(player_team_map.loc[all_time_scorers.index.get_level_values(0)].to_dict())

all_time_scorers.hvplot.barh(
    title='Top 10 All-Time NBA Scoring Leaders',
    xlabel='',
    ylabel='All-Time Points Scored',
    color='TEAM',
    cmap=team_colors,
    legend=False,
    width=800,
    height=600,
    fontsize={'title': 20, 'labels': 16, 'ticks': 12}
)

In [63]:
all_time_scorers_single_team.nlargest(10, columns='PTS').drop(columns=['PLAYER_ID']).sort_values('PTS').hvplot.barh(
    x='PLAYER',
    y='PTS',
    title='Top 10 All-Time NBA Scoring Leaders For Single Team',
    xlabel='',
    ylabel='All-Time Points Scored',
    color='TEAM',
    cmap=team_colors,
    legend=False,
    width=800,
    height=600,
    fontsize={'title': 20, 'labels': 16, 'ticks': 12}
)

In [3]:
df['color'] = df['TEAM'].map(team_colors)

In [4]:
season_max_gp = df.groupby('SEASON')['GP'].max()

In [5]:
def normalize_season_lengths(row):
    pts = row['PTS']
    max_played = season_max_gp[row['SEASON']]
    return pts * (82 / max_played)

def normalize_games_played(row):
    pts = row['PTS']
    games_played = row['GP']
    return pts * (82 / games_played)

In [6]:
df['PPG'] = df['PTS'] / df['GP']
df['PTS_season_norm'] = df.apply(normalize_season_lengths, axis=1).astype(int)
df['PTS_game_norm'] = df.apply(normalize_games_played, axis=1).astype(int)

In [7]:
top_10_total = df.sort_values(['SEASON', 'PTS'], ascending=[True, False]).groupby('SEASON').head(10).copy()
top_10_season_norm = df.sort_values(['SEASON', 'PTS_season_norm'], ascending=[True, False]).groupby('SEASON').head(10).copy()
top_10_game_norm = df.sort_values(['SEASON', 'PTS_game_norm'], ascending=[True, False]).groupby('SEASON').head(10).copy()
top_10_avg = df[(df['GP'] >= 58) | (df['SEASON'] == 1998)].sort_values(['SEASON', 'PPG'], ascending=[True, False]).groupby('SEASON').head(10).copy()

In [8]:
scoring_title_total = df.sort_values(['SEASON', 'PTS'], ascending=[True, False]).groupby('SEASON').head(1).copy().set_index('SEASON')
scoring_title_season_norm = df.sort_values(['SEASON', 'PTS_season_norm'], ascending=[True, False]).groupby('SEASON').head(1).copy().set_index('SEASON')
scoring_title_game_norm = df.sort_values(['SEASON', 'PTS_game_norm'], ascending=[True, False]).groupby('SEASON').head(1).copy().set_index('SEASON')
scoring_title_avg = df[(df['GP'] >= 58) | (df['SEASON'] == 1998)].sort_values(['SEASON', 'PPG'], ascending=[True, False]).groupby('SEASON').head(1).copy().set_index('SEASON')

In [10]:
hover_cols = ["PLAYER"]
params = {
    "x": "SEASON",
    "y": "PPG",
    "size": 50,
}
(
    scoring_title_avg.hvplot.line(color="black", hover=False, **params)
    * scoring_title_avg.hvplot.scatter(color="TEAM", legend=False, cmap=team_colors, hover_cols=hover_cols, **params)
)

In [11]:
hover_cols = ["PLAYER"]
params = {
    "x": "SEASON",
    "y": "PTS",
    "size": 50,
}
(
    scoring_title_total.hvplot.line(color="black", hover=False, **params)
    * scoring_title_total.hvplot.scatter(color="TEAM", legend=False, cmap=team_colors, hover_cols=hover_cols, **params)
)

In [12]:
hover_cols = ["PLAYER"]
params = {
    "x": "SEASON",
    "y": "PTS_season_norm",
    "size": 50,
}
(
    scoring_title_total.hvplot.line(color="black", hover=False, **params)
    * scoring_title_total.hvplot.scatter(color="TEAM", legend=False, cmap=team_colors, hover_cols=hover_cols, **params)
)

In [14]:
player_count_scoring_title_total = scoring_title_total.groupby('PLAYER')['PLAYER'].count().sort_values(ascending=False)

In [15]:
player_count_scoring_title_avg = scoring_title_avg.groupby('PLAYER')['PLAYER'].count().sort_values(ascending=False)

In [26]:
def highlight_rows(row):
    if row['Total'] > row['Avg']:
        return ['background-color: rgba(0, 255, 0, 0.3)'] * len(row)
    elif row['Avg'] > row['Total']:
        return ['background-color: rgba(255, 0, 0, 0.3)'] * len(row)
    else:
        return [''] * len(row)

styled_df = pd.concat(
    [player_count_scoring_title_total, player_count_scoring_title_avg],
    axis=1,
    keys=['Total', 'Avg']
).sort_values(
    'Total', ascending=False
).fillna(0).astype(int).style.apply(highlight_rows, axis=1)

styled_df

Unnamed: 0_level_0,Total,Avg
PLAYER,Unnamed: 1_level_1,Unnamed: 2_level_1
Michael Jordan,11,10
Wilt Chamberlain,7,7
Kevin Durant,5,4
Kobe Bryant,4,2
George Gervin,4,4
James Harden,4,3
Kareem Abdul-Jabbar,3,2
Shaquille O'Neal,3,2
Bob McAdoo,3,3
Neil Johnston,3,3


In [33]:
disagree = scoring_title_total['PLAYER'] != scoring_title_avg['PLAYER']
disagreement = pd.concat([scoring_title_avg[disagree], scoring_title_total[disagree]])
plots = []
for season in disagreement.index.unique():
    plots.append(
        disagreement.loc[season].sort_values('PTS_season_norm').hvplot.barh(
            x='SEASON', 
            y='PTS_season_norm', 
            by='PLAYER', 
            size=50, 
            hover_cols=["PPG"], 
            color="PPG", 
            cmap="RdYlGn", 
            title=f"Disagreement between total points and points per game scoring champions",
            colorbar=True,
            clabel="PPG"
        )
    )

p = plots[0]
for plot in plots[1:]:
    p *= plot
p.opts(width=800, height=800, ylim=(1750, 2500))


In [152]:
scoring_title_avg["PCT_GP"] = 100* scoring_title_avg['GP'] / season_max_gp
(
    scoring_title_avg.hvplot.line(x='SEASON', y='PCT_GP', hover_cols=['PLAYER']).opts(xlabel='Season', ylabel='Percentage of games played', title="Percentage of games played by scoring champion")
    * scoring_title_avg.hvplot.scatter(x='SEASON', y='PCT_GP', hover_cols=['PLAYER'], size=50)
)

In [170]:
(
    top_10_avg.groupby('SEASON')['GP'].mean().hvplot.line(label="Average games played by top 10 scorers") 
    * top_10_avg.groupby('SEASON')['GP'].mean().hvplot.scatter(label="Average games played by top 10 scorers", size=50) 
    * season_max_gp.hvplot.line(color="black", line_dash="dashed", label="Maximum games played in the season")
).opts(legend_position='top')

In [132]:
(100* top_10_avg.groupby('SEASON')['GP'].mean() / season_max_gp).hvplot.line().opts(xlabel='Season', ylabel='Percentage of games played', title="Average percentage of games played by top 10 scorers")

In [36]:
hover_cols = ["PLAYER", "PTS", "PPG", "PTS_game_norm"]
(
    scoring_title_avg.hvplot.area(x='SEASON', y='PTS', y2='PTS_game_norm', color="teal", alpha=0.5, hover=False)
    * scoring_title_avg.hvplot.scatter(x='SEASON', y='PTS', color="teal", size=10, hover_cols=hover_cols)
    * scoring_title_avg.hvplot.scatter(x='SEASON', y='PTS_game_norm', color="teal", size=10, hover_cols=hover_cols)
).opts(legend_position='top', title="Scoring champion points scored vs. points scored normalized to 82 games")