In [1]:
import time

import requests
from bs4 import BeautifulSoup
import pandas as pd
from tqdm import tqdm

In [2]:
url = "https://www.basketball-reference.com/leaders/pts_career.html"
res = requests.get(url)
soup = BeautifulSoup(res.text,"html.parser")

In [3]:
l_players = []
for elmt in soup.select("table#tot")[0].select("tr"):
    tds = elmt.select("td")
    strong = elmt.select("strong") # active player

    if tds:
        rank = tds[0].text
        name = tds[1].text.strip().replace("*","")
        href = tds[1].a.get("href")
        isActive = 1 if strong else 0
        l_players.append([rank,name,href,isActive])

df_players = pd.DataFrame(l_players,columns=["rank","name","href","isActive"])

In [4]:
l_df_season = []
df_players_active = df_players.query('isActive==1').head(8)
for _,name,href,_ in tqdm(df_players_active.values,total=len(df_players_active)):
    time.sleep(0.5)
    url = "https://www.basketball-reference.com" + href
    res = requests.get(url)
    soup = BeautifulSoup(res.text,"html.parser")

    l = []
    for elmt in soup.select("table#totals")[0].select("tbody > tr"):
        season = elmt.select("th")[0].text
        points = elmt.select("[data-stat='pts']")[0].text
        l.append([name, season, points])

    l_df_season.append(pd.DataFrame(l,columns=["name","season","pts"]))

100%|██████████| 8/8 [00:09<00:00,  1.13s/it]


In [5]:
_df_season = pd.concat(l_df_season)
_df_season["pts"] = _df_season["pts"].astype(int)
df_season = _df_season.groupby(["name","season"],as_index=False)["pts"].sum()
df_season["pts_cumsum"] = df_season.groupby(["name"])["pts"].cumsum()
df_season["season_year"] = df_season.groupby(["name"]).cumcount()+1
df_season = pd.concat([df_season, pd.DataFrame({"name":df_season["name"].drop_duplicates(),"pts_cumsum":0,"season_year":0})])


In [6]:
df_season_pivot = df_season.pivot_table(values="pts_cumsum",index="season_year",columns="name")

In [None]:
import matplotlib.pyplot as plt
from matplotlib.animation import ArtistAnimation
from IPython import display

df_setting = pd.DataFrame([
    ["Chris Paul",       "grey", 0.5,-1000,0.5],
    ["DeMar DeRozan",    "grey", 0.5,  500,0.5],
    ["Stephen Curry",    "grey", 0.5, 2000,0.5],
    ["Russell Westbrook","grey", 0.5,  500,0.5],
    ["Kevin Durant",     "grey", 0.0,    0,1.0],
    ["James Harden",     "grey", 1.0, 1500,3.0],
    ["Carmelo Anthony",  "grey", 0.5,  500,0.5],
    ["LeBron James",     "orange",0.5,  500,0.5],
    ],columns=["name","color","legendx","legendy","legendh"])

fig = plt.figure(figsize=(10,10))
plt.subplots_adjust(left=0.2,right=0.8,top=0.9)
plt.style.use('dark_background')
plt.rcParams['font.size'] = 15
plt.xlim(0, 21)
plt.ylim(0, 42000)
plt.xlabel("Career Seasons")
plt.ylabel("Total Points Scored")
plt.xticks(ticks=range(21), labels=range(21))
plt.yticks(ticks=[i*10000 for i in range(4+1)], labels=[f"{i*10000:,}" for i in range(4+1)])
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['top'].set_visible(False)
plt.title("NBA - Career Seasons and Total Points Scored\n(Only active players scored 20,000+, as of 2023-02-08)")

for _x in [5,10,15,20]:
    plt.axvline(x=_x, ymin=0, ymax=40000,color="grey",linewidth=1)
for _y in [1,2,3,4]:
    plt.axhline(y=_y*10000, xmin=0, xmax=20,color="grey",linewidth=1)

# organize data
frames = []
frame = []
for name,color,legendx,legendy,legendh in df_setting.values:

    _df = df_season_pivot[name].dropna()

    for i in range(len(_df)):

        fp = plt.plot(_df.head(i+1),linestyle="-",color=color,linewidth=2,marker="o",markersize=4,markerfacecolor=color,markeredgecolor=color)

        if i < len(_df)-1:
            frames.append(frame+fp)
        else:
            x,y = _df.index.max(),_df.max()
            fl1 = plt.plot([x, x+legendx], [y, y+legendy],color=color,linewidth=1)
            fl2 = plt.plot([x+legendx,x+legendx+legendh], [y+legendy, y+legendy],color=color,linewidth=1)
            if name == "LeBron James":
                flt = plt.text(x+legendx+legendh+0.5,y+legendy,f" \n{name}\n{max(_df):,.0f}",color=color,fontsize=15)
            else:
                flt = plt.text(x+legendx+legendh+0.5,y+legendy,name,color=color,fontsize=15)

            frame = frame + fp + fl1 + fl2 + [flt]
            frames.append(frame)

for i in range(30):
    frames.append(frame)
# draw
ani = ArtistAnimation(fig, frames, interval=60)
ani.save('../output/20230209_NBA_Points_Ranking.gif')
# html = display.HTML(ani.to_jshtml())
# display.display(html)
plt.close()