In [1]:
# To assemble a dataset of all NBA seasons since 1980, I used vishaalagartha's basketball reference scraper, 
# which can be found on their github here: https://github.com/vishaalagartha/basketball_reference_scraper 
from basketball_reference_scraper.teams import get_roster, get_roster_stats

# Lists of team names for scraping, together with when they joined the NBA. I chose 1980 as the starting point 
# because that's when the 3 point line was introduced
old_teams = (['ATL', 'BOS', 'BRK', 'CHI', 'CLE', 'DEN', 'DET', 'GSW', 'PHI', 'HOU', 
 'IND', 'LAC', 'LAL', 'MIL', 'NYK', 'OKC', 'PHO', 'POR', 
 'SAC', 'SAS', 'UTA', 'WAS'], 1980) 
teams_81 = (['DAL'], 1981)
teams_89 = (['MIA'], 1989)
teams_90 = (['MIN', 'ORL'], 1990)
teams_96 = (['MEM', 'TOR'], 1996)
teams_03 = (['NOP'], 2003) 

# Charlotte was not in the league for 2 years 
char_89 = (['CHO'], 1989)
char_05 = (['CHO'], 2005)

In [2]:
import pandas as pd

# Load team data -- takes a list of team names and a start year, returns all relevant seasons
def load_team_data(team_names, start_year, end_year=2020, verbose=True): 
    # empty array for storing season data
    seasons = []
    for team in team_names:
        for year in range(start_year, end_year + 1):
            # get the data for this team and season, append it to the array
            season_data = get_roster_stats(team, year)
            seasons.append(season_data)
        if verbose:
            print("Done with " + team + "\n")
    return pd.concat(seasons)

In [3]:
# Empty array for storing team data
team_data = []

# Load each team's data
for teams in [old_teams, teams_81, teams_89, teams_90, teams_96, teams_03, char_05]: 
    team_data.append(load_team_data(teams[0], teams[1]))

# load Charlotte's first stint with an end date 
team_data.append(load_team_data(char_89[0], char_89[1], end_year=2002))

# concatenate everything into one big dataframe
data = pd.concat(team_data)


Done with ATL

Done with BOS

Done with BRK

Done with CHI

Done with CLE

Done with DEN

Done with DET

Done with GSW

Done with PHI

Done with HOU

Done with IND

Done with LAC

Done with LAL

Done with MIL

Done with NYK

Done with OKC

Done with PHO

Done with POR

Done with SAC

Done with SAS

Done with UTA

Done with WAS

Done with DAL

Done with MIA

Done with MIN

Done with ORL

Done with MEM

Done with TOR

Done with NOP

Done with CHO

Done with CHO



In [4]:
data.head, data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 19109 entries, 0 to 14
Data columns (total 30 columns):
PLAYER    19109 non-null object
POS       19109 non-null object
AGE       19109 non-null object
TEAM      19109 non-null object
G         19109 non-null object
GS        18476 non-null object
MP        19109 non-null object
FG        19109 non-null object
FGA       19109 non-null object
FG%       19009 non-null object
3P        19109 non-null object
3PA       19109 non-null object
3P%       15740 non-null object
2P        19109 non-null object
2PA       19109 non-null object
2P%       18954 non-null object
eFG%      19009 non-null object
FT        19109 non-null object
FTA       19109 non-null object
FT%       18265 non-null object
ORB       19109 non-null object
DRB       19109 non-null object
TRB       19109 non-null object
AST       19109 non-null object
STL       19109 non-null object
BLK       19109 non-null object
TOV       19109 non-null object
PF        19109 non-null objec

(<bound method NDFrame.head of                PLAYER POS AGE TEAM   G   GS    MP   FG   FGA   FG%  ...  ORB  \
 0          John Brown  SF  28  ATL  28  NaN  12.9  1.3   3.5  .378  ...  0.8   
 1       Charlie Criss  PG  31  ATL  81  NaN  22.1  3.1   7.1  .431  ...  0.3   
 2           John Drew  SF  25  ATL  80  NaN  28.8  6.7  14.8  .453  ...  2.5   
 3        Terry Furlow  SG  25  ATL  21  NaN  19.2  3.1   7.7  .410  ...  1.1   
 4         Jack Givens  SF  23  ATL  82  NaN  15.3  2.2   5.8  .385  ...  1.4   
 ..                ...  ..  ..  ...  ..  ...   ...  ...   ...   ...  ...  ...   
 10    JÃ©rÃ´me MoÃ¯so   C  23  CHH  15    0   5.1  0.5   1.3  .400  ...  0.3   
 11         Lee Nailon  SF  26  CHH  79   41  24.2  4.7   9.7  .483  ...  1.3   
 12  Eldridge Recasner  SG  34  CHH   1    0   2.0  0.0   0.0   NaN  ...  0.0   
 13     Robert Traylor  PF  24  CHH  61    1  11.1  1.4   3.3  .426  ...  1.1   
 14       David Wesley  SG  31  CHH  67   63  37.1  5.4  13.6  .400  ...  0.7  

In [5]:
# Save as a CSV 
data.to_csv('nba_data.csv')