In [34]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_coaches_year(url):
    coaches = pd.read_html(url)
    return coaches

In [107]:
def scrape_all_seasons(first_year, last_year):
    all_seasons = pd.DataFrame()
    for year in range(first_year, last_year + 1):
        url = f'https://www.basketball-reference.com/leagues/NBA_{year}_coaches.html'
        year_data = scrape_coaches_year(url)[0]
        year_data['Year'] = year
        if year == first_year:
            all_seasons = year_data
        else:
            all_seasons = pd.concat([all_seasons, year_data], axis=0, ignore_index=True)
    return all_seasons


In [108]:
since_2000 = scrape_all_seasons(2000, 2023)

In [109]:
since_2000.columns = [' '.join(col).strip() for col in since_2000.columns]

In [110]:
since_2000.columns

Index(['Unnamed: 0_level_0 Unnamed: 0_level_1 Coach',
       'Unnamed: 1_level_0 Unnamed: 1_level_1 Tm',
       'Unnamed: 2_level_0 Unnamed: 2_level_1 Unnamed: 2_level_2',
       'Seasons w/ Franch #', 'Seasons Overall #',
       'Unnamed: 5_level_0 Unnamed: 5_level_1 Unnamed: 5_level_2',
       'Regular Season Current Season G', 'Regular Season Current Season W',
       'Regular Season Current Season L', 'Regular Season w/ Franchise G',
       'Regular Season w/ Franchise W', 'Regular Season w/ Franchise L',
       'Regular Season Career G', 'Regular Season Career W',
       'Regular Season Career L', 'Regular Season Career W%',
       'Unnamed: 16_level_0 Unnamed: 16_level_1 Unnamed: 16_level_2',
       'Playoffs Current Season G', 'Playoffs Current Season W',
       'Playoffs Current Season L', 'Playoffs w/ Franchise G',
       'Playoffs w/ Franchise W', 'Playoffs w/ Franchise L',
       'Playoffs Career G', 'Playoffs Career W', 'Playoffs Career L', 'Year'],
      dtype='object')

In [111]:
store_og_columns = since_2000.columns.copy()

In [112]:
since_2000.columns = [x.replace('Unnamed: ', '').replace('_level_', '').replace('1', '').replace('0', '').replace('5', '').replace('6', '').replace('2', '').replace('  ', '') for x in since_2000.columns]

In [113]:
since_2000.drop(columns=[''], inplace=True)

In [114]:
pd.set_option('display.max_columns', None)
since_2000

Unnamed: 0,Coach,Tm,Seasons w/ Franch #,Seasons Overall #,Regular Season Current Season G,Regular Season Current Season W,Regular Season Current Season L,Regular Season w/ Franchise G,Regular Season w/ Franchise W,Regular Season w/ Franchise L,Regular Season Career G,Regular Season Career W,Regular Season Career L,Regular Season Career W%,Playoffs Current Season G,Playoffs Current Season W,Playoffs Current Season L,Playoffs w/ Franchise G,Playoffs w/ Franchise W,Playoffs w/ Franchise L,Playoffs Career G,Playoffs Career W,Playoffs Career L,Year
0,Lenny Wilkens,ATL,7,27,82,28,54,542,310,232,2160,1179,981,0.546,,,,47.0,17.0,30.0,157.0,72.0,85.0,2000
1,Rick Pitino,BOS,3,5,82,35,47,214,90,124,378,180,198,0.476,,,,,,,13.0,6.0,7.0,2000
2,Paul Silas,CHH,2,5,82,49,33,117,71,46,363,149,214,0.410,4.0,1.0,3.0,4.0,1.0,3.0,4.0,1.0,3.0,2000
3,Tim Floyd,CHI,2,2,82,17,65,132,30,102,132,30,102,0.227,,,,,,,,,,2000
4,Randy Wittman,CLE,1,1,82,32,50,82,32,50,82,32,50,0.390,,,,,,,,,,2000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
805,Mike Brown,SAC,1,9,82,48,34,82,48,34,645,395,250,0.612,7.0,3.0,4.0,7.0,3.0,4.0,90.0,50.0,40.0,2023
806,Gregg Popovich,SAS,27,27,82,22,60,2127,1366,761,2127,1366,761,0.642,,,,284.0,170.0,114.0,284.0,170.0,114.0,2023
807,Nick Nurse,TOR,5,5,82,41,41,390,227,163,390,227,163,0.582,,,,41.0,25.0,16.0,41.0,25.0,16.0,2023
808,Will Hardy,UTA,1,1,82,37,45,82,37,45,82,37,45,0.451,,,,,,,,,,2023


In [118]:
previous_year_team_win_pct = since_2000.groupby(['Tm', 'Year']).sum()
previous_year_team_win_pct


The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



Unnamed: 0_level_0,Unnamed: 1_level_0,Seasons w/ Franch #,Seasons Overall #,Regular Season Current Season G,Regular Season Current Season W,Regular Season Current Season L,Regular Season w/ Franchise G,Regular Season w/ Franchise W,Regular Season w/ Franchise L,Regular Season Career G,Regular Season Career W,Regular Season Career L,Regular Season Career W%,Playoffs Current Season G,Playoffs Current Season W,Playoffs Current Season L,Playoffs w/ Franchise G,Playoffs w/ Franchise W,Playoffs w/ Franchise L,Playoffs Career G,Playoffs Career W,Playoffs Career L
Tm,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
ATL,2000,7,27,82,28,54,542,310,232,2160,1179,981,0.546,0.0,0.0,0.0,47.0,17.0,30.0,157.0,72.0,85.0
ATL,2001,1,1,82,25,57,82,25,57,82,25,57,0.305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ATL,2002,2,2,82,33,49,164,58,106,164,58,106,0.354,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ATL,2003,4,4,82,35,47,246,93,153,246,93,153,0.797,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ATL,2004,2,2,82,28,54,137,52,85,137,52,85,0.380,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WAS,2019,3,10,82,32,50,246,124,122,791,462,329,0.584,0.0,0.0,0.0,19.0,9.0,10.0,92.0,48.0,44.0
WAS,2020,4,11,72,25,47,318,149,169,863,487,376,0.564,0.0,0.0,0.0,19.0,9.0,10.0,92.0,48.0,44.0
WAS,2021,5,12,72,34,38,390,183,207,935,521,414,0.557,5.0,1.0,4.0,24.0,10.0,14.0,97.0,49.0,48.0
WAS,2022,1,1,82,35,47,82,35,47,82,35,47,0.427,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [119]:
previous_year_team_win_pct['Win%'] = previous_year_team_win_pct['Regular Season Current Season W'] / previous_year_team_win_pct['Regular Season Current Season G']

In [120]:
previous_year_team_win_pct.reset_index(inplace=True)

In [121]:
# Adding 1 to year for joining purposes
previous_year_team_win_pct['Year'] = previous_year_team_win_pct['Year'] + 1

In [122]:
prev_year = previous_year_team_win_pct[['Tm', 'Year', 'Win%']]

In [123]:
since_2000 = since_2000.merge(prev_year, left_on=['Tm', 'Year'], right_on=['Tm', 'Year'], how='inner')

In [129]:
first_season_head_coaches = since_2000[(since_2000['Seasons w/ Franch #'] == 1) & (since_2000['Regular Season Current Season G'] == 82)]

In [133]:
pd.set_option('display.max_rows', 25)

In [131]:
first_season_head_coaches.fillna(0, inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [134]:
first_season_head_coaches

Unnamed: 0,Coach,Tm,Seasons w/ Franch #,Seasons Overall #,Regular Season Current Season G,Regular Season Current Season W,Regular Season Current Season L,Regular Season w/ Franchise G,Regular Season w/ Franchise W,Regular Season w/ Franchise L,Regular Season Career G,Regular Season Career W,Regular Season Career L,Regular Season Career W%,Playoffs Current Season G,Playoffs Current Season W,Playoffs Current Season L,Playoffs w/ Franchise G,Playoffs w/ Franchise W,Playoffs w/ Franchise L,Playoffs Career G,Playoffs Career W,Playoffs Career L,Year,Win%
0,Lon Kruger,ATL,1,1,82,25,57,82,25,57,82,25,57,0.305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2001,0.341463
9,Dave Cowens,GSW,1,5,82,17,65,82,17,65,329,153,176,0.465,0.0,0.0,0.0,0.0,0.0,0.0,12.0,4.0,8.0,2001,0.231707
11,Isiah Thomas,IND,1,1,82,41,41,82,41,41,82,41,41,0.500,4.0,1.0,3.0,4.0,1.0,3.0,4.0,1.0,3.0,2001,0.682927
12,Alvin Gentry,LAC,1,5,82,31,51,82,31,51,263,119,144,0.452,0.0,0.0,0.0,0.0,0.0,0.0,5.0,2.0,3.0,2001,0.182927
17,Byron Scott,NJN,1,1,82,26,56,82,26,56,82,26,56,0.317,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2001,0.378049
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
730,Wes Unseld,WAS,1,1,82,35,47,82,35,47,82,35,47,0.427,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2022,0.472222
734,Joe Mazzulla,BOS,1,1,82,57,25,82,57,25,82,57,25,0.695,15.0,8.0,7.0,15.0,8.0,7.0,15.0,8.0,7.0,2023,0.621951
747,Darvin Ham,LAL,1,1,82,43,39,82,43,39,82,43,39,0.524,14.0,8.0,6.0,14.0,8.0,6.0,14.0,8.0,6.0,2023,0.402439
759,Mike Brown,SAC,1,9,82,48,34,82,48,34,645,395,250,0.612,7.0,3.0,4.0,7.0,3.0,4.0,90.0,50.0,40.0,2023,0.365854


In [135]:
fshc = first_season_head_coaches.copy()

In [None]:
playoff_hirings = fshc[fsch.]

In [97]:
import plotly.express as px
px.scatter(first_season_head_coaches, x='Seasons Overall #', y='Regular Season Current Season W')