In [39]:
import pandas as pd
import matplotlib as plt
import os 
from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguegamefinder

## Extract data for given team

Code to extract all data for a given team 
User need to set the city name in "city".

Raw data are save in ***_raw.csv
last season in ***_2018-2019.csv

In [57]:
# set city to search
city = 'oklahoma'
#city = 'houston'

# set output files
dir_name ="DATA/"
base_filename = city
filename_suffix =".csv"
raw_out = os.path.join(dir_name, base_filename + "_rawData" + filename_suffix)
season_out = os.path.join(dir_name, base_filename + "_2018-2019_league_Data" + filename_suffix)

In [26]:
# Find teams by city.
team_info = teams.find_teams_by_city(city)
team_info

[{'id': 1610612760,
  'full_name': 'Oklahoma City Thunder',
  'abbreviation': 'OKC',
  'nickname': 'Thunder',
  'city': 'Oklahoma City',
  'state': 'Oklahoma',
  'year_founded': 1967}]

In [27]:
# get abbreviation
abb = team_info[0]["abbreviation"]
abb

'OKC'

In [29]:
nba_teams = teams.get_teams()
# Select the dictionary for the Celtics, which contains their team ID
tmp = [team for team in nba_teams if team['abbreviation'] == abb][0]
team_id = tmp['id']
team_id

1610612760

In [32]:
# Query for games where the Celtics were playing
gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=team_id)
# The first DataFrame of those returned is what we want.
games = gamefinder.get_data_frames()[0]
games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22019,1610612760,OKC,Oklahoma City Thunder,1521900067,2019-07-12,OKC vs. CRO,W,199,69,...,0.5,1,24,25,18,7,8,13,11,3.4
1,22019,1610612760,OKC,Oklahoma City Thunder,1521900061,2019-07-11,OKC vs. POR,W,199,92,...,0.727,9,31,40,17,7,4,15,20,6.0
2,22019,1610612760,OKC,Oklahoma City Thunder,1521900040,2019-07-09,OKC vs. CRO,W,199,84,...,0.667,6,19,25,20,10,6,11,16,0.6
3,22019,1610612760,OKC,Oklahoma City Thunder,1521900030,2019-07-08,OKC @ PHI,W,211,84,...,0.72,14,31,45,22,14,7,19,26,0.2
4,22019,1610612760,OKC,Oklahoma City Thunder,1521900011,2019-07-06,OKC @ UTA,L,201,66,...,0.625,6,35,41,11,10,8,21,17,-10.4


In [33]:
# drop NA 
games=games.dropna()
#games_dropNA.count()

# count total matches
total_matches = games["WL"].count()
total_matches

2109

In [58]:
games.to_csv(raw_out,index=False)

## Extract seasonal data for given city

In [45]:
# extract data for last season
start_season = "2018-10-1"
end_season   = "2019-06-15"


mask = (games["GAME_DATE"]<end_season) & (games["GAME_DATE"]>start_season)
tmp = games.loc[mask]

In [46]:
tmp

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
5,42018,1610612760,OKC,Oklahoma City Thunder,0041800165,2019-04-23,OKC @ POR,L,239,115,...,0.600,6,33,39,24,4,8,15,22,-9.0
6,42018,1610612760,OKC,Oklahoma City Thunder,0041800164,2019-04-21,OKC vs. POR,L,241,98,...,0.773,6,38,44,23,7,4,14,22,-13.0
7,42018,1610612760,OKC,Oklahoma City Thunder,0041800163,2019-04-19,OKC vs. POR,W,239,120,...,0.795,8,29,37,27,6,5,15,24,12.0
8,42018,1610612760,OKC,Oklahoma City Thunder,0041800162,2019-04-16,OKC @ POR,L,240,94,...,0.679,13,34,47,21,6,6,16,27,-20.0
9,42018,1610612760,OKC,Oklahoma City Thunder,0041800161,2019-04-14,OKC @ POR,L,240,99,...,0.870,18,31,49,18,10,4,16,26,-5.0
10,22018,1610612760,OKC,Oklahoma City Thunder,0021801226,2019-04-10,OKC @ MIL,W,240,127,...,0.615,9,44,53,40,7,3,12,20,11.0
11,22018,1610612760,OKC,Oklahoma City Thunder,0021801218,2019-04-09,OKC vs. HOU,W,240,112,...,0.800,18,38,56,20,5,2,12,19,1.0
12,22018,1610612760,OKC,Oklahoma City Thunder,0021801197,2019-04-07,OKC @ MIN,W,239,132,...,0.846,14,32,46,27,6,4,13,30,6.0
13,22018,1610612760,OKC,Oklahoma City Thunder,0021801186,2019-04-05,OKC vs. DET,W,240,123,...,0.682,21,38,59,30,10,4,17,24,13.0
14,22018,1610612760,OKC,Oklahoma City Thunder,0021801161,2019-04-02,OKC vs. LAL,W,241,119,...,0.600,17,33,50,33,15,7,12,14,16.0


In [47]:
mask = tmp["SEASON_ID" ]== "22018"
last_season_data = tmp.loc[mask]

In [48]:
last_season_data.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
10,22018,1610612760,OKC,Oklahoma City Thunder,21801226,2019-04-10,OKC @ MIL,W,240,127,...,0.615,9,44,53,40,7,3,12,20,11.0
11,22018,1610612760,OKC,Oklahoma City Thunder,21801218,2019-04-09,OKC vs. HOU,W,240,112,...,0.8,18,38,56,20,5,2,12,19,1.0
12,22018,1610612760,OKC,Oklahoma City Thunder,21801197,2019-04-07,OKC @ MIN,W,239,132,...,0.846,14,32,46,27,6,4,13,30,6.0
13,22018,1610612760,OKC,Oklahoma City Thunder,21801186,2019-04-05,OKC vs. DET,W,240,123,...,0.682,21,38,59,30,10,4,17,24,13.0
14,22018,1610612760,OKC,Oklahoma City Thunder,21801161,2019-04-02,OKC vs. LAL,W,241,119,...,0.6,17,33,50,33,15,7,12,14,16.0


In [49]:
# count data to check NA
total_games= last_season_data["SEASON_ID"].count()
total_games
# look like there is no NA

82

In [50]:
# Drop NA, just in case 

last_season_data = last_season_data.dropna()
last_season_data.count()

SEASON_ID            82
TEAM_ID              82
TEAM_ABBREVIATION    82
TEAM_NAME            82
GAME_ID              82
GAME_DATE            82
MATCHUP              82
WL                   82
MIN                  82
PTS                  82
FGM                  82
FGA                  82
FG_PCT               82
FG3M                 82
FG3A                 82
FG3_PCT              82
FTM                  82
FTA                  82
FT_PCT               82
OREB                 82
DREB                 82
REB                  82
AST                  82
STL                  82
BLK                  82
TOV                  82
PF                   82
PLUS_MINUS           82
dtype: int64

In [56]:
last_season_data.to_csv(season_out,index=False)

In [54]:
#split win/loss stat

total_WL_groupby = last_season_data.groupby("WL").count()

total_WL =total_WL_groupby["SEASON_ID"]
total_WL

WL
L    33
W    49
Name: SEASON_ID, dtype: int64