## Importing Data from NBA_API

For this analysis, I will be using data sourced from NBA.com through NBA_API, which is a free and open-source API available through Python. The API code and methodology is available here: https://github.com/swar/nba_api

In [240]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from warnings import simplefilter
import warnings
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
simplefilter(action="ignore", category=pd.errors.SettingWithCopyWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

# Importing all NBA games
from nba_api.stats.endpoints import leaguegamefinder
gamefinder = leaguegamefinder.LeagueGameFinder(league_id_nullable="00")
league_games = gamefinder.get_data_frames()[0]

league_games[['SEASON_ID', 'GAME_ID', 'TEAM_ID']] = league_games[['SEASON_ID', 'GAME_ID', 'TEAM_ID']].astype(str)

display(league_games['SEASON_ID'].unique())
league_games.to_csv('/Users/kaileighstopa/Desktop/Python Practice/Celtics Project/league_games.csv')

# seasons = ['22022', '22021', '22020', '22019', '22018', '22017', '22016', '22015', '22014']

# from nba_api.stats.endpoints import boxscoretraditionalv2
# import time

# league_box_scores = pd.DataFrame()
# for season in seasons:
#     season  = league_games[league_games['SEASON_ID'] == season]
#     display(season)
#     season_game_ids = season['GAME_ID'].unique()
#     display(len(season_game_ids))
#     for game_id in season_game_ids:
#         league_box_score = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id, timeout=200000)
#         league_box_score = league_box_score.get_data_frames()[0]
#         league_box_scores = pd.concat([league_box_scores, league_box_score], axis=0)
#         time.sleep(1)

# display(league_box_scores.head())

# league_box_scores.to_csv('/Users/kaileighstopa/Desktop/Python Practice/Celtics Project/league_box_scores.csv')

league_games = pd.read_csv('/Users/kaileighstopa/Desktop/Python Practice/Celtics Project/league_games.csv')
league_box_scores = pd.read_csv('/Users/kaileighstopa/Desktop/Python Practice/Celtics Project/league_box_scores.csv')

array(['22022', '32022', '12022', '42021', '52021', '22021', '32021',
       '12021', '42020', '52020', '22020', '32020', '12020', '42019',
       '52019', '22019', '12019', '32019', '42018', '22018', '32018',
       '12018', '42017', '22017', '32017', '12017', '42016', '22016',
       '32016', '12016', '42015', '22015', '32015', '12015', '42014',
       '22014', '32014', '12014', '42013', '22013', '32013', '12013',
       '42012', '22012', '32012', '12012', '42011', '22011'], dtype=object)

In [241]:
# Filter dataset for regular season games starting in 2014 season
league_games[['SEASON_ID', 'GAME_ID', 'TEAM_ID']] = league_games[['SEASON_ID', 'GAME_ID', 'TEAM_ID']].astype(str)
seasons = ['22022', '22021', '22020', '22019', '22018', '22017', '22016', '22015', '22014']
mask = league_games['SEASON_ID'].isin(seasons)
league_games = league_games[mask]

# Inspecting dataset and checking missing values
display(league_games.head())
display(league_games.isna().sum())

# Validating game count by team per season
game_count = league_games.groupby(['SEASON_ID', 'TEAM_NAME']).size()
display(game_count)

Unnamed: 0.1,Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,0,22022,1610612743,DEN,Denver Nuggets,22200989,2023-03-08,DEN vs. CHI,L,241,...,0.467,11,33,44,27,5,2,11,16,-21.0
1,1,22022,1610612756,PHX,Phoenix Suns,22200990,2023-03-08,PHX vs. OKC,W,240,...,0.875,14,40,54,31,10,7,14,17,31.0
2,2,22022,1610612737,ATL,Atlanta Hawks,22200985,2023-03-08,ATL @ WAS,W,241,...,0.769,8,28,36,26,11,5,11,17,2.0
3,3,22022,1610612748,MIA,Miami Heat,22200987,2023-03-08,MIA vs. CLE,L,241,...,0.759,10,27,37,22,7,4,22,19,-4.0
4,4,22022,1610612741,CHI,Chicago Bulls,22200989,2023-03-08,CHI @ DEN,W,240,...,0.944,9,41,50,26,9,2,8,16,21.0


Unnamed: 0           0
SEASON_ID            0
TEAM_ID              0
TEAM_ABBREVIATION    0
TEAM_NAME            0
GAME_ID              0
GAME_DATE            0
MATCHUP              0
WL                   0
MIN                  0
PTS                  0
FGM                  0
FGA                  0
FG_PCT               0
FG3M                 0
FG3A                 0
FG3_PCT              0
FTM                  0
FTA                  0
FT_PCT               0
OREB                 0
DREB                 0
REB                  0
AST                  0
STL                  0
BLK                  0
TOV                  0
PF                   0
PLUS_MINUS           0
dtype: int64

SEASON_ID  TEAM_NAME         
22014      Atlanta Hawks         82
           Boston Celtics        82
           Brooklyn Nets         82
           Charlotte Hornets     82
           Chicago Bulls         82
                                 ..
22022      Sacramento Kings      64
           San Antonio Spurs     65
           Toronto Raptors       67
           Utah Jazz             66
           Washington Wizards    66
Length: 270, dtype: int64