## Data collection and preprocessing

In [2]:
import requests
import os
import pandas as pd
from pandas.io.json import json_normalize

In [3]:
# Global set-ups for free NBA api
XRapid_api_key = os.environ.get('Rapid_API_KEY')
XRapid_api_host = "free-nba.p.rapidapi.com"
free_nba_api_endpoint = "https://free-nba.p.rapidapi.com/games"

SEASON = 2018

In [4]:
%%time
# Collect all data in a loop
def _get_response(params, page=None):
    if page:
        params.update({'page': page})
        
    res = requests.get(free_nba_api_endpoint, 
                       params=params,
                       headers={
                           "X-RapidAPI-Host": XRapid_api_host,
                           "X-RapidAPI-Key": XRapid_api_key
                       }
                      )
    content_dict = res.json()
    if content_dict['meta']['next_page']:
        next_page = content_dict['meta']['next_page']
    else:
        next_page = False
    return content_dict['data'], next_page

results = []
params = {'seasons[]': SEASON, 'per_page': 100}

res, next_page = _get_response(params)
results.extend(res)

while next_page:
    res, next_page = _get_response(params, page=next_page)
    results.extend(res)

CPU times: user 401 ms, sys: 48.6 ms, total: 449 ms
Wall time: 11.9 s


In [5]:
# Convert data into pandas dataframe
def nba_data_processing(nba_data_dict):
    df = pd.DataFrame.from_dict(json_normalize(nba_data_dict, sep='_'), orient='columns')
    return df

In [6]:
df = nba_data_processing(results)

In [7]:
# Reformat date
df['date'] = df.apply(lambda x: x.date.replace('T00:00:00.000Z', ''), axis=1)

In [8]:
df.head()

Unnamed: 0,date,home_team_abbreviation,home_team_city,home_team_conference,home_team_division,home_team_full_name,home_team_id,home_team_name,home_team_score,id,...,status,time,visitor_team_abbreviation,visitor_team_city,visitor_team_conference,visitor_team_division,visitor_team_full_name,visitor_team_id,visitor_team_name,visitor_team_score
0,2019-01-30,BOS,Boston,East,Atlantic,Boston Celtics,2,Celtics,126,47179,...,Final,,CHA,Charlotte,East,Southeast,Charlotte Hornets,4,Hornets,94
1,2019-02-09,BOS,Boston,East,Atlantic,Boston Celtics,2,Celtics,112,48751,...,Final,,LAC,LA,West,Pacific,LA Clippers,13,Clippers,123
2,2019-02-08,PHI,Philadelphia,East,Atlantic,Philadelphia 76ers,23,76ers,117,48739,...,Final,,DEN,Denver,West,Northwest,Denver Nuggets,8,Nuggets,110
3,2019-02-08,WAS,Washington,East,Southeast,Washington Wizards,30,Wizards,119,48740,...,Final,,CLE,Cleveland,East,Central,Cleveland Cavaliers,6,Cavaliers,106
4,2019-02-08,SAC,Sacramento,West,Pacific,Sacramento Kings,26,Kings,102,48746,...,Final,,MIA,Miami,East,Southeast,Miami Heat,16,Heat,96


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1311 entries, 0 to 1310
Data columns (total 23 columns):
date                         1311 non-null object
home_team_abbreviation       1311 non-null object
home_team_city               1311 non-null object
home_team_conference         1311 non-null object
home_team_division           1311 non-null object
home_team_full_name          1311 non-null object
home_team_id                 1311 non-null int64
home_team_name               1311 non-null object
home_team_score              1311 non-null int64
id                           1311 non-null int64
period                       1311 non-null int64
postseason                   1311 non-null bool
season                       1311 non-null int64
status                       1311 non-null object
time                         1311 non-null object
visitor_team_abbreviation    1311 non-null object
visitor_team_city            1311 non-null object
visitor_team_conference      1311 non-null object


## Plot all-team results

In [10]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
from bokeh.models.tools import HoverTool
from bokeh.io import reset_output, output_notebook
import numpy as np
from bokeh.layouts import gridplot
from bokeh.models import BoxSelectTool, LassoSelectTool
from bokeh.plotting import figure, curdoc

In [11]:
source_east_both = ColumnDataSource(df[(df.home_team_conference == 'East') & (df.visitor_team_conference == 'East')])
source_west_both = ColumnDataSource(df[(df.home_team_conference == 'West') & (df.visitor_team_conference == 'West')])
source_west_at_east = ColumnDataSource(df[(df.home_team_conference == 'East') & (df.visitor_team_conference == 'West')])
source_east_at_west = ColumnDataSource(df[(df.home_team_conference == 'East') & (df.visitor_team_conference == 'West')])

TOOLS="pan,wheel_zoom,box_select,lasso_select,reset"
output_notebook()
p = figure(tools=TOOLS, plot_width=600, plot_height=600, min_border=10, min_border_left=50,
           toolbar_location="above")

p.background_fill_color = "#fafafa"


p.scatter(x='home_team_score', y='visitor_team_score',
         source=source_east_both,
         size=8, alpha=0.6, color='blue', legend="Both East teams")
p.scatter(x='home_team_score', y='visitor_team_score',
         source=source_west_both,
         size=8, alpha=0.7, color='red', legend="Both West teams")
p.scatter(x='home_team_score', y='visitor_team_score',
         source=source_west_at_east,
         size=8, alpha=0.7, color='green', legend="West @ East")
p.scatter(x='home_team_score', y='visitor_team_score',
         source=source_east_at_west,
         size=8, alpha=0.7, color='yellow', legend="East @ West")

p.legend.location = "top_left"
p.legend.click_policy="hide"

p.title.text = 'All team scores in Season '+str(SEASON)
p.xaxis.axis_label = 'Home team score'
p.yaxis.axis_label = 'Visitor team score'

hover = HoverTool()
hover.tooltips=[
    ('Home Team', '@home_team_full_name'),
    ('Home Team Score', '@home_team_score'),
#     ('Visitor Team', '@visitor_team_full_name'),
    ('Visitor Team Score', '@visitor_team_score'),
    ('Date', '@date')
]

p.add_tools(hover)

show(p)