In [25]:
import pandas as pd
import random
import requests
import time

# formatting 
pd.set_option('display.float_format', lambda x: '%.3f' % x)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 5000)

# postgres

# from sqlalchemy import create_engine
# import psycopg2

# To be Added, Database Loader
# engine = create_engine('postgresql://scott:tiger@localhost:5432/mydatabase')
# engine = create_engine("postgresql://paul:postgres@localhost:5432/postgres")

# df_all.to_sql("leagueLeaders", engine, schema='public', if_exists='replace', index=False) # chunksize = 1000, 

### Metadata

HTML URL: https://stats.nba.com/game/0021900470/hustle/

API ENDPOINT: https://stats.nba.com/stats/hustlestatsboxscore?GameID=0021900470

In [2]:
url = "https://stats.nba.com/stats/hustlestatsboxscore?GameID=0021900470"

In [3]:
headers = {
    'Host': 'stats.nba.com',
    'Connection': 'keep-alive',
    'Accept': 'application/json, text/plain, */*',
    'x-nba-stats-token': 'true',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
    'x-nba-stats-origin': 'stats',
    'Referer': 'https://stats.nba.com/',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9'}

r = requests.get(url, headers=headers).json()

In [4]:
type(r)

dict

In [5]:
# These are the methods you have on a dictionary
dir(r)

['__class__',
 '__contains__',
 '__delattr__',
 '__delitem__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'clear',
 'copy',
 'fromkeys',
 'get',
 'items',
 'keys',
 'pop',
 'popitem',
 'setdefault',
 'update',
 'values']

The main ones of interest are:
- `keys`
- `values`

We're going to figure out the "keys" that's "hiearchy" within the data contactioner (the dictionary) to access the data.

Here's a few tutorials. If desired
- https://realpython.com/python-dicts/
- https://www.learnpython.org/en/Dictionaries
- https://www.youtube.com/watch?v=daefaLgNkw0 (video)


#### Here's a quick tutorial
- They map a `key` to a value or values and that "value" can be any number of things. A key can be a `str` or `int`.
- The value can be a `list`, _another_ `dictionary`, a `set`, a `tuple`, a `string`, a `float`, an `integer`, etc
- They're very handy and they're VERY fast. BUT, they don't have an order. You MUST `call` a dictionary value by it's `key`


In [6]:
months = { 1 : "January", 
2 : "February", 
3 : "March", 
4 : "April", 
5 : "May", 
6 : "June", 
7 : "July",
8 : "August",
9 : "September", 
10 : "October", 
11 : "November",
12 : "December" }

In [8]:
months.keys()

dict_keys([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])

In [9]:
months.values()

dict_values(['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'])

In [12]:
months[5]

'May'

In [13]:
MLB_teams = {
'Colorado' : 'Rockies',
'Boston'   : 'Red Sox',
'Minnesota': 'Twins',
'Milwaukee': 'Brewers',
'Seattle'  : 'Mariners'
}

In [14]:
MLB_teams.keys()

dict_keys(['Colorado', 'Boston', 'Minnesota', 'Milwaukee', 'Seattle'])

In [16]:
MLB_teams['Colorado']

'Rockies'

In [17]:
MLB_teams['Minnesota']

'Twins'

Sometimes the values has more than just a single string, here there's ANOTHER Dictionary in it.

In [19]:
MLB_teams_info = {
'Colorado' : {'nickname' : 'Rockies', 'stadium' : 'Coors Field'},
'Boston'   : {'nickname' : 'Red Sox', 'stadium' : 'Fenway Stadium'},
'Minnesota': {'nickname' : 'Twins', 'stadium' : 'Hubert Humphrey Metrodome'},
'Milwaukee': {'nickname' : 'Brewers', 'stadium' : 'Miller Park'},
'Seattle'  : {'nickname' : 'Mariners', 'stadium' : 'Kingdome'}
}

In [20]:
MLB_teams_info

{'Colorado': {'nickname': 'Rockies', 'stadium': 'Coors Field'},
 'Boston': {'nickname': 'Red Sox', 'stadium': 'Fenway Stadium'},
 'Minnesota': {'nickname': 'Twins', 'stadium': 'Hubert Humphrey Metrodome'},
 'Milwaukee': {'nickname': 'Brewers', 'stadium': 'Miller Park'},
 'Seattle': {'nickname': 'Mariners', 'stadium': 'Kingdome'}}

In [21]:
MLB_teams_info['Minnesota']

{'nickname': 'Twins', 'stadium': 'Hubert Humphrey Metrodome'}

In [23]:
type(MLB_teams_info['Minnesota'])

dict

In [22]:
MLB_teams_info['Minnesota']['stadium'] # gives us the stadium VALUE

'Hubert Humphrey Metrodome'

#### Getting back to our `r` object for our scrape.... maybe we can parse it for the values we want?

In [27]:
r.keys()

dict_keys(['resource', 'parameters', 'resultSets'])

In [28]:
r['resultSets'] # for the result, what TYPE am I?

[{'name': 'HustleStatsAvailable',
  'headers': ['GAME_ID', 'HUSTLE_STATUS'],
  'rowSet': [['0021900470', 1]]},
 {'name': 'PlayerStats',
  'headers': ['GAME_ID',
   'TEAM_ID',
   'TEAM_ABBREVIATION',
   'TEAM_CITY',
   'PLAYER_ID',
   'PLAYER_NAME',
   'START_POSITION',
   'COMMENT',
   'MINUTES',
   'PTS',
   'CONTESTED_SHOTS',
   'CONTESTED_SHOTS_2PT',
   'CONTESTED_SHOTS_3PT',
   'DEFLECTIONS',
   'CHARGES_DRAWN',
   'SCREEN_ASSISTS',
   'SCREEN_AST_PTS',
   'OFF_LOOSE_BALLS_RECOVERED',
   'DEF_LOOSE_BALLS_RECOVERED',
   'LOOSE_BALLS_RECOVERED',
   'OFF_BOXOUTS',
   'DEF_BOXOUTS',
   'BOX_OUT_PLAYER_TEAM_REBS',
   'BOX_OUT_PLAYER_REBS',
   'BOX_OUTS'],
  'rowSet': [['0021900470',
    1610612754,
    'IND',
    'Indiana',
    203933,
    'T.J. Warren',
    'F',
    '',
    '30:58',
    16,
    4,
    1,
    3,
    2,
    1,
    0,
    0,
    1,
    0,
    1,
    0,
    3,
    1,
    1,
    3],
   ['0021900470',
    1610612754,
    'IND',
    'Indiana',
    1627734,
    'Domantas Sabon

In [29]:
type(r['resultSets']) # how long is this list?

list

In [32]:
len(r['resultSets']) # lets look at each of those 3 things... see if there's something we want

3

In [33]:
r['resultSets'][0]

{'name': 'HustleStatsAvailable',
 'headers': ['GAME_ID', 'HUSTLE_STATUS'],
 'rowSet': [['0021900470', 1]]}

In [34]:
r['resultSets'][1] # this looks like the PLAYER STATS.... headers and stats.

{'name': 'PlayerStats',
 'headers': ['GAME_ID',
  'TEAM_ID',
  'TEAM_ABBREVIATION',
  'TEAM_CITY',
  'PLAYER_ID',
  'PLAYER_NAME',
  'START_POSITION',
  'COMMENT',
  'MINUTES',
  'PTS',
  'CONTESTED_SHOTS',
  'CONTESTED_SHOTS_2PT',
  'CONTESTED_SHOTS_3PT',
  'DEFLECTIONS',
  'CHARGES_DRAWN',
  'SCREEN_ASSISTS',
  'SCREEN_AST_PTS',
  'OFF_LOOSE_BALLS_RECOVERED',
  'DEF_LOOSE_BALLS_RECOVERED',
  'LOOSE_BALLS_RECOVERED',
  'OFF_BOXOUTS',
  'DEF_BOXOUTS',
  'BOX_OUT_PLAYER_TEAM_REBS',
  'BOX_OUT_PLAYER_REBS',
  'BOX_OUTS'],
 'rowSet': [['0021900470',
   1610612754,
   'IND',
   'Indiana',
   203933,
   'T.J. Warren',
   'F',
   '',
   '30:58',
   16,
   4,
   1,
   3,
   2,
   1,
   0,
   0,
   1,
   0,
   1,
   0,
   3,
   1,
   1,
   3],
  ['0021900470',
   1610612754,
   'IND',
   'Indiana',
   1627734,
   'Domantas Sabonis',
   'F',
   '',
   '31:09',
   8,
   5,
   1,
   4,
   1,
   0,
   10,
   25,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0],
  ['0021900470',
   1610612754,
   'I

In [35]:
r['resultSets'][2] # this looks like the TEAM STATS.... headers and stats

{'name': 'TeamStats',
 'headers': ['GAME_ID',
  'TEAM_ID',
  'TEAM_NAME',
  'TEAM_ABBREVIATION',
  'TEAM_CITY',
  'MINUTES',
  'PTS',
  'CONTESTED_SHOTS',
  'CONTESTED_SHOTS_2PT',
  'CONTESTED_SHOTS_3PT',
  'DEFLECTIONS',
  'CHARGES_DRAWN',
  'SCREEN_ASSISTS',
  'SCREEN_AST_PTS',
  'OFF_LOOSE_BALLS_RECOVERED',
  'DEF_LOOSE_BALLS_RECOVERED',
  'LOOSE_BALLS_RECOVERED',
  'OFF_BOXOUTS',
  'DEF_BOXOUTS',
  'BOX_OUT_PLAYER_TEAM_REBS',
  'BOX_OUT_PLAYER_REBS',
  'BOX_OUTS'],
 'rowSet': [['0021900470',
   1610612754,
   'Pacers',
   'IND',
   'Indiana',
   '240:00',
   112,
   52,
   28,
   24,
   18,
   1,
   19,
   46,
   2,
   1,
   3,
   0,
   8,
   2,
   1,
   8],
  ['0021900470',
   1610612748,
   'Heat',
   'MIA',
   'Miami',
   '240:00',
   113,
   65,
   39,
   26,
   10,
   0,
   9,
   21,
   3,
   3,
   6,
   1,
   9,
   3,
   0,
   10]]}

#### Let's grab the player stats

- Goals
    1. We want to grab the `headers` (the column names)
    2. We want to grab the `rowSet` (the data)
    3. Make a Pandas DataFrame
    4. Export to CSV

In [36]:
r['resultSets'][1].keys()

dict_keys(['name', 'headers', 'rowSet'])

In [37]:
r['resultSets'][1]['headers']

['GAME_ID',
 'TEAM_ID',
 'TEAM_ABBREVIATION',
 'TEAM_CITY',
 'PLAYER_ID',
 'PLAYER_NAME',
 'START_POSITION',
 'COMMENT',
 'MINUTES',
 'PTS',
 'CONTESTED_SHOTS',
 'CONTESTED_SHOTS_2PT',
 'CONTESTED_SHOTS_3PT',
 'DEFLECTIONS',
 'CHARGES_DRAWN',
 'SCREEN_ASSISTS',
 'SCREEN_AST_PTS',
 'OFF_LOOSE_BALLS_RECOVERED',
 'DEF_LOOSE_BALLS_RECOVERED',
 'LOOSE_BALLS_RECOVERED',
 'OFF_BOXOUTS',
 'DEF_BOXOUTS',
 'BOX_OUT_PLAYER_TEAM_REBS',
 'BOX_OUT_PLAYER_REBS',
 'BOX_OUTS']

In [38]:
r['resultSets'][1]['rowSet']

[['0021900470',
  1610612754,
  'IND',
  'Indiana',
  203933,
  'T.J. Warren',
  'F',
  '',
  '30:58',
  16,
  4,
  1,
  3,
  2,
  1,
  0,
  0,
  1,
  0,
  1,
  0,
  3,
  1,
  1,
  3],
 ['0021900470',
  1610612754,
  'IND',
  'Indiana',
  1627734,
  'Domantas Sabonis',
  'F',
  '',
  '31:09',
  8,
  5,
  1,
  4,
  1,
  0,
  10,
  25,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 ['0021900470',
  1610612754,
  'IND',
  'Indiana',
  1626167,
  'Myles Turner',
  'C',
  '',
  '26:21',
  13,
  9,
  8,
  1,
  1,
  0,
  5,
  10,
  0,
  0,
  0,
  0,
  2,
  1,
  0,
  2],
 ['0021900470',
  1610612754,
  'IND',
  'Indiana',
  203087,
  'Jeremy Lamb',
  'G',
  '',
  '30:03',
  16,
  4,
  1,
  3,
  10,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 ['0021900470',
  1610612754,
  'IND',
  'Indiana',
  1628988,
  'Aaron Holiday',
  'G',
  '',
  '29:38',
  17,
  7,
  3,
  4,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 ['0021900470',
  1610612754,
  'IND',
  'Indiana',
  203200,

In [40]:
# BE careful to not SWITCH the DATA and the HEADERS.
# => pd.DataFrame(DATA_FIRST, columns=HEADERS)

df = pd.DataFrame(r['resultSets'][1]['rowSet'], columns=  r['resultSets'][1]['headers'])

In [41]:
df

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,START_POSITION,COMMENT,MINUTES,PTS,CONTESTED_SHOTS,CONTESTED_SHOTS_2PT,CONTESTED_SHOTS_3PT,DEFLECTIONS,CHARGES_DRAWN,SCREEN_ASSISTS,SCREEN_AST_PTS,OFF_LOOSE_BALLS_RECOVERED,DEF_LOOSE_BALLS_RECOVERED,LOOSE_BALLS_RECOVERED,OFF_BOXOUTS,DEF_BOXOUTS,BOX_OUT_PLAYER_TEAM_REBS,BOX_OUT_PLAYER_REBS,BOX_OUTS
0,21900470,1610612754,IND,Indiana,203933,T.J. Warren,F,,30:58,16,4,1,3,2,1,0,0,1,0,1,0,3,1,1,3
1,21900470,1610612754,IND,Indiana,1627734,Domantas Sabonis,F,,31:09,8,5,1,4,1,0,10,25,0,0,0,0,0,0,0,0
2,21900470,1610612754,IND,Indiana,1626167,Myles Turner,C,,26:21,13,9,8,1,1,0,5,10,0,0,0,0,2,1,0,2
3,21900470,1610612754,IND,Indiana,203087,Jeremy Lamb,G,,30:03,16,4,1,3,10,0,0,0,0,0,0,0,0,0,0,0
4,21900470,1610612754,IND,Indiana,1628988,Aaron Holiday,G,,29:38,17,7,3,4,0,0,0,0,0,0,0,0,0,0,0,0
5,21900470,1610612754,IND,Indiana,203200,Justin Holiday,,,30:14,11,8,4,4,1,0,3,8,0,1,1,0,2,0,0,2
6,21900470,1610612754,IND,Indiana,204456,T.J. McConnell,,,18:25,6,3,2,1,0,0,0,0,0,0,0,0,0,0,0,0
7,21900470,1610612754,IND,Indiana,1629048,Goga Bitadze,,,4:10,0,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0
8,21900470,1610612754,IND,Indiana,1628410,Edmond Sumner,,,17:54,11,6,4,2,3,0,1,3,0,0,0,0,0,0,0,0
9,21900470,1610612754,IND,Indiana,203926,Doug McDermott,,,21:08,14,2,0,2,0,0,0,0,1,0,1,0,1,0,0,1


In [None]:
df.to_csv('hustlestatsboxscore_0021900470.csv', index=False)