In [1]:
from bs4 import BeautifulSoup
from bs4 import Comment
from ratelimiter import RateLimiter
import numpy as np
import pandas as pd

import requests
import string


In [2]:
home_url = 'https://www.basketball-reference.com/playoffs/series.html'

series_names = ['Eastern Conf First Round', 'Eastern Conf Semifinals', 'Eastern Conf Finals', 
                'Western Conf First Round', 'Western Conf Semifinals', 'Western Conf Finals', 'Finals']
# merger
year_cutoff1 = 1977
year_cutoff2 = 2022

data_tags = ['player', 'g', 'pts', 'trb', 'ast', 'stl', 'blk']

In [3]:
r = requests.get(home_url)
soup = BeautifulSoup(r.content, 'html.parser')
soup_tbl = soup.find_all('div', {'class': 'table_container'})
rows = soup_tbl[0].find_all('tr')

In [4]:
base_url = 'https://www.basketball-reference.com'
url_list = []
series_list = []

# for i in range(len(rows)):
for row in rows:
    link_cells = row.find_all('a', href=True)
    for cell in link_cells:
        if cell.text in series_names:
            query_url = cell['href']
            year = int(query_url[10:14])
            if year_cutoff1 <= year <= year_cutoff2:
                url_list.append(base_url + query_url)
                series = query_url[10:-5]
                series = series.replace('-', ' ')
                series_list.append(string.capwords(series))

In [5]:
rate_limiter = RateLimiter(max_calls=15, period=60)

# need to calculate stats from scratch bc ugh
counter = 0
dfs = []
for url in url_list:
    with rate_limiter:
        # url = url_list[104]
        # url = url_list[0]
        # url = url_list[44]
        r = requests.get(url)

        series_stats = []
        soup = BeautifulSoup(r.content, 'html.parser')
        soup_tbl = soup.find_all('table', {'class': 'sortable stats_table'})

        # some tables are stored in comments?
        comment_table_list = []
        for comment in soup.find_all(string=lambda text: isinstance(text, Comment)):
            if comment.find("<table ") > 0:
                comment_soup = BeautifulSoup(comment, 'html.parser')
                table_check = comment_soup.find_all('table', {'class': 'sortable stats_table'})
                if len(table_check) != 0:
                    for table in table_check:
                        comment_table_list.append(table)

        all_stat_tables = soup_tbl + comment_table_list
        filter_tbl = []
        for table in all_stat_tables:
            val_check = table.find_all('td', {'data-stat': 'pts'})
            if len(val_check) != 0:
                filter_tbl.append(table)

        for tbl in filter_tbl:
            rows = tbl.find_all('tbody')[0].find_all('tr')
            # build df of series stats
            for row in rows:
                player_stats = []
                for i in range(len(data_tags)):
                    val = row.find_all('td', {'data-stat': data_tags[i]})
                    if len(val) == 0:
                        # why did i do this again? 
                        if i == 0:
                            continue
                        else:
                            player_stats.append('0.0')
                    else:
                        # maybe this does a better job at covering the previous block???
                        if val[0].text == '':
                            player_stats.append('0.0')
                        else:
                            player_stats.append(val[0].text)
                series_stats.append(player_stats)

        df = pd.DataFrame(series_stats, columns=['Player', 'GP', 'PTS', 'REB', 'AST', 'STL', 'BLK'])
        df['GP'] = df['GP'].astype(int)
        stat_cols = ['PTS', 'REB', 'AST', 'STL', 'BLK']
        for col in stat_cols:
            df[col] = df[col].astype(float)
            df[col] = df[col] / df['GP']
            df[f'{col}_rank'] = df[col].rank(method='min', ascending=False).astype(int)
        df['best4_rank_sum'] = df['PTS_rank'] + df['REB_rank'] + df['AST_rank'] + df['STL_rank'] + df['BLK_rank'] - df[['PTS_rank', 'REB_rank', 'AST_rank', 'STL_rank', 'BLK_rank']].max(axis=1)
        dfs.append(df)
        counter += 1
        if counter % 50 == 0:
            print(f'Finished query {counter}')


Finished query 50
Finished query 100
Finished query 150
Finished query 200
Finished query 250
Finished query 300
Finished query 350
Finished query 400
Finished query 450
Finished query 500
Finished query 550
Finished query 600
Finished query 650


In [6]:
# find dfs with a player leading 4+ cats
filter_dfs = []
filter_series = []
for i in range(len(dfs)):
    df = dfs[i]
    if len(df.loc[df['best4_rank_sum'] <= 4]) > 0:
        filter_dfs.append(df)
        filter_series.append(series_list[i])

print(len(url_list))
print(len(dfs))
print(len(filter_dfs))


662
662
22


In [26]:
top_players = []
# ignore last 2 because steal/block info missing
for i in range(len(filter_dfs)-2):
    top_players.append(filter_dfs[i].sort_values(by='best4_rank_sum').head(1))
top_df = pd.concat(top_players, ignore_index=True)

top_counts = top_df[['Player']].value_counts().to_frame().reset_index()
top_counts.rename(columns={0: 'Count'})

Unnamed: 0,Player,Count
0,LeBron James,7
1,Hakeem Olajuwon,2
2,Nikola Jokić,2
3,Charles Barkley,1
4,Draymond Green,1
5,Kevin Garnett,1
6,Larry Bird,1
7,Luka Dončić,1
8,Michael Jordan,1
9,Paul George,1


In [25]:
# ignore last 2 because steal/block info missing
for i in range(len(filter_dfs)-2):
    print('')
    print(filter_series[i])
    display(filter_dfs[i].sort_values(by='best4_rank_sum').reset_index(drop=True).round(1).head(5))


2022 Nba Western Conference Semifinals Mavericks Vs Suns


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,Luka Dončić,7,32.6,9.9,7.0,2.1,0.1,1,1,1,1,14,4
1,Devin Booker,7,23.4,5.3,4.6,0.6,0.6,2,4,3,11,3,12
2,Mikal Bridges,7,9.9,4.9,3.0,1.3,0.6,10,5,4,3,3,15
3,Jalen Brunson,7,18.7,4.4,2.9,1.1,0.0,3,7,5,4,20,19
4,Chris Paul,7,13.4,4.0,5.7,1.1,0.1,5,10,2,4,14,21



2020 Nba Western Conference Semifinals Nuggets Vs Clippers


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,Nikola Jokić,7,24.4,13.4,6.6,0.7,1.4,1,1,1,10,1,4
1,Kawhi Leonard,7,24.3,8.6,5.9,2.3,1.1,2,2,3,1,2,7
2,Paul George,7,21.7,5.4,3.6,2.0,0.9,4,5,5,2,5,16
3,Jamal Murray,7,22.6,4.4,6.4,1.3,0.1,3,8,2,4,14,17
4,Gary Harris,7,10.7,2.1,2.6,1.9,0.3,7,16,6,3,11,27



2019 Nba Western Conference First Round Spurs Vs Nuggets


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,Nikola Jokić,7,23.1,12.1,9.1,1.3,0.7,1,1,1,1,2,4
1,LaMarcus Aldridge,7,20.0,9.6,2.7,0.7,1.0,3,2,7,5,1,11
2,DeMar DeRozan,7,22.0,6.7,4.6,1.1,0.1,2,5,2,3,14,12
3,Derrick White,7,15.1,3.0,3.0,0.7,0.7,5,13,6,5,2,18
4,Jamal Murray,7,19.0,2.7,4.1,1.3,0.1,4,14,3,1,14,22



2019 Nba Western Conference Finals Trail Blazers Vs Warriors


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,Draymond Green,4,16.5,11.8,8.8,2.2,2.8,6,1,1,1,1,4
1,Klay Thompson,4,21.5,3.8,3.0,1.8,1.2,4,11,6,2,2,14
2,Stephen Curry,4,36.5,8.2,7.2,0.5,0.0,1,2,3,12,17,18
3,Damian Lillard,4,22.2,4.8,8.5,0.8,0.0,2,7,2,9,17,20
4,CJ McCollum,4,22.0,2.8,4.5,1.0,0.5,3,15,4,6,7,20



2018 Nba Eastern Conference First Round Pacers Vs Cavaliers


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,LeBron James,7,34.4,10.0,7.7,1.4,1.0,1,1,1,3,1,4
1,Victor Oladipo,7,22.7,8.3,6.0,2.4,0.4,2,3,2,1,6,8
2,Thaddeus Young,7,11.3,7.7,1.4,1.7,0.9,7,4,9,2,2,15
3,Kevin Love,7,11.4,9.3,1.0,0.6,0.3,6,2,11,10,7,25
4,Bojan Bogdanović,7,12.4,3.4,1.9,0.9,0.0,3,9,7,7,18,26



2017 Nba Eastern Conference First Round Pacers Vs Cavaliers


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,LeBron James,4,32.8,9.8,9.0,3.0,2.0,1,2,1,1,1,4
1,Paul George,4,28.0,8.8,7.2,1.8,0.5,2,5,2,3,7,12
2,Kyrie Irving,4,25.2,2.2,3.0,1.0,1.0,3,10,4,5,4,16
3,Jeff Teague,4,17.0,3.2,6.2,1.0,0.8,4,9,3,5,5,17
4,Myles Turner,4,10.8,6.8,0.8,1.8,1.2,8,6,13,3,2,19



2017 Nba Western Conference First Round Thunder Vs Rockets


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,Russell Westbrook,5,37.4,11.6,10.8,2.4,0.4,1,1,1,1,10,4
1,James Harden,5,33.2,6.4,7.0,1.6,0.6,2,5,2,4,6,13
2,Andre Roberson,5,11.6,6.2,1.8,2.4,3.4,6,6,6,1,1,14
3,Patrick Beverley,5,11.6,6.2,3.0,1.8,0.0,6,6,3,3,16,18
4,Steven Adams,5,8.0,6.8,1.4,1.2,1.8,11,2,10,6,3,21



2016 Nba Finals Cavaliers Vs Warriors


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,LeBron James,7,29.7,11.3,8.9,2.6,2.3,1,1,1,1,1,4
1,Draymond Green,6,16.5,10.3,6.3,1.7,1.0,5,2,2,3,3,10
2,Kyrie Irving,7,27.1,3.9,3.9,2.1,0.7,2,9,4,2,5,13
3,Stephen Curry,7,22.6,4.9,3.7,0.9,0.7,3,7,5,7,5,20
4,Andre Iguodala,7,9.1,6.3,4.1,0.9,0.7,9,5,3,7,5,20



2015 Nba Eastern Conference Finals Cavaliers Vs Hawks


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,LeBron James,4,30.2,11.0,9.2,1.5,0.5,1,1,1,1,5,4
1,Jeff Teague,4,21.5,3.2,4.5,1.2,1.0,2,13,3,2,3,10
2,J.R. Smith,4,18.0,7.5,2.0,0.8,0.8,3,4,8,6,4,17
3,Paul Millsap,4,13.8,7.8,2.5,0.8,0.2,4,3,4,6,11,17
4,Kyrie Irving,2,13.0,3.5,5.5,0.5,0.5,5,12,2,10,5,22



2013 Nba Eastern Conference First Round Hawks Vs Pacers


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,Paul George,6,18.7,9.5,5.0,1.8,0.8,1,1,1,1,2,4
1,Al Horford,6,16.7,8.8,3.0,1.0,0.8,3,2,7,7,2,14
2,Josh Smith,6,17.0,7.5,3.5,1.8,0.5,2,5,6,1,8,14
3,Roy Hibbert,6,14.7,8.8,1.5,0.2,1.8,6,2,10,14,1,19
4,George Hill,6,15.2,3.7,4.3,1.5,0.2,5,8,3,4,14,20



2009 Nba Eastern Conference First Round Pistons Vs Cavaliers


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,LeBron James,4,32.0,11.2,7.5,1.5,0.8,1,1,1,1,4,4
1,Zydrunas Ilgauskas,4,11.2,6.2,2.0,0.2,1.2,8,3,7,11,1,19
2,Antonio McDyess,4,13.0,8.5,0.5,0.5,0.8,5,2,10,8,4,19
3,Delonte West,4,11.5,4.0,4.0,1.2,0.5,7,8,5,2,6,20
4,Richard Hamilton,4,13.2,2.8,5.0,1.2,0.2,4,11,4,2,10,20



2008 Nba Eastern Conference Semifinals Cavaliers Vs Celtics


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,LeBron James,7,26.7,6.4,7.6,2.1,1.3,1,3,1,1,1,4
1,Kevin Garnett,7,19.6,10.9,3.1,0.9,1.0,2,1,5,5,4,12
2,Paul Pierce,7,19.4,5.0,3.6,1.1,0.1,3,5,3,3,15,14
3,Zydrunas Ilgauskas,7,11.9,7.7,1.7,0.3,1.3,4,2,8,16,1,15
4,Delonte West,7,11.4,3.7,3.4,1.1,0.6,5,10,4,3,6,18



2007 Nba Eastern Conference Finals Cavaliers Vs Pistons


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,LeBron James,6,25.7,9.2,8.5,2.7,0.5,1,1,1,1,6,4
1,Rasheed Wallace,6,14.0,7.2,2.0,1.7,2.7,4,3,6,2,1,10
2,Richard Hamilton,6,19.7,3.8,3.7,1.3,0.2,2,9,3,3,11,17
3,Anderson Varejão,6,7.5,6.3,0.3,1.3,0.7,11,4,18,3,5,23
4,Chauncey Billups,6,15.3,3.8,3.5,0.8,0.2,3,9,4,8,11,24



2004 Nba Western Conference First Round Nuggets Vs Timberwolves


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,Kevin Garnett,5,25.8,14.8,7.0,1.0,2.0,1,1,1,5,1,4
1,Latrell Sprewell,5,19.8,3.6,3.2,1.8,1.2,3,8,4,1,3,11
2,Andre Miller,5,15.4,4.6,3.2,1.6,0.0,5,6,4,2,14,17
3,Carmelo Anthony,4,15.0,8.2,2.8,1.2,0.0,6,3,7,4,14,20
4,Marcus Camby,5,12.6,11.4,2.4,0.8,1.4,8,2,8,9,2,20



1996 Nba Eastern Conference First Round Heat Vs Bulls


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,Scottie Pippen,3,19.7,11.3,7.0,3.0,1.7,2,1,1,1,1,4
1,Michael Jordan,3,30.0,3.7,2.7,1.7,0.3,1,9,6,3,4,14
2,Toni Kukoč,3,14.3,6.3,3.7,0.3,0.3,5,3,3,13,4,15
3,Alonzo Mourning,3,18.0,6.0,1.3,0.7,1.0,3,4,11,10,2,19
4,Ron Harper,3,10.0,3.0,3.7,2.3,0.0,6,11,3,2,9,20



1995 Nba Western Conference First Round Trail Blazers Vs Suns


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,Charles Barkley,3,33.7,13.7,3.3,1.7,1.7,1,1,3,1,1,4
1,Rod Strickland,3,23.3,4.0,12.3,1.0,0.7,2,8,1,3,2,8
2,Harvey Grant,3,14.3,5.3,2.0,1.0,0.7,6,5,5,3,2,15
3,Clifford Robinson,3,15.7,6.3,2.7,0.7,0.3,4,3,4,8,5,16
4,Kevin Johnson,3,17.7,3.3,9.0,1.0,0.0,3,9,2,3,11,17



1994 Nba Western Conference First Round Trail Blazers Vs Rockets


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,Hakeem Olajuwon,4,34.0,11.0,4.8,2.2,3.8,1,1,4,1,1,4
1,Clyde Drexler,4,21.0,10.2,5.5,2.0,0.5,3,2,2,2,5,9
2,Rod Strickland,4,23.5,4.0,9.8,1.0,0.5,2,8,1,5,5,13
3,Vernon Maxwell,4,16.8,5.0,5.0,1.5,0.0,4,6,3,3,15,16
4,Robert Horry,4,15.0,4.8,4.5,1.0,1.5,6,7,6,5,2,19



1991 Nba Eastern Conference Semifinals 76ers Vs Bulls


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,Michael Jordan,5,33.4,8.0,7.8,1.8,1.4,1,4,1,1,1,4
1,Scottie Pippen,5,23.4,9.4,6.0,1.6,0.4,3,2,2,2,7,9
2,Charles Barkley,5,25.6,10.2,5.4,1.4,0.2,2,1,3,3,10,9
3,Hersey Hawkins,5,19.8,5.8,3.0,1.4,1.4,4,6,5,3,1,13
4,Armen Gilliam,5,16.2,6.2,1.4,0.6,1.0,5,5,12,10,3,23



1988 Nba Western Conference First Round Rockets Vs Mavericks


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,Hakeem Olajuwon,4,37.5,16.8,1.8,2.2,2.8,1,1,10,1,1,4
1,Roy Tarpley,4,17.5,10.8,1.5,1.5,1.0,4,2,12,4,4,14
2,Sam Perkins,4,14.5,7.5,1.8,1.5,1.2,6,4,10,4,2,16
3,Mark Aguirre,4,20.8,6.0,3.0,1.2,0.2,2,6,5,6,9,19
4,Sleepy Floyd,4,18.8,1.8,8.5,2.0,0.0,3,13,1,3,12,19



1984 Nba Eastern Conference Finals Bucks Vs Celtics


Unnamed: 0,Player,GP,PTS,REB,AST,STL,BLK,PTS_rank,REB_rank,AST_rank,STL_rank,BLK_rank,best4_rank_sum
0,Larry Bird,5,27.4,10.0,6.0,2.4,0.6,1,1,1,1,7,4
1,Sidney Moncrief,5,17.2,6.2,4.0,2.4,0.8,3,5,2,1,5,11
2,Bob Lanier,5,10.0,7.4,4.0,1.4,0.8,12,3,2,3,5,13
3,Marques Johnson,5,20.2,3.0,3.8,1.2,0.4,2,13,4,5,9,20
4,Dennis Johnson,5,17.2,5.6,3.6,0.8,0.2,3,7,5,7,12,22
