# Chapter 15: The Sports Analytics case study

In [13]:
import pandas as pd
import seaborn as sns
import json
from urllib import request

## Get the data

In [14]:
shots_url = 'https://www.murach.com/python_analysis/shots.json'
shots = request.urlretrieve(shots_url, filename='shots.json')

In [15]:
with open('shots.json') as jsonData:
    shots = json.load(jsonData)
shots.keys()

dict_keys(['resource', 'parameters', 'resultSets'])

In [16]:
columnHeaders = shots['resultSets'][0]['headers']
columnHeaders = [x.lower() for x in columnHeaders]
columnHeaders

['grid_type',
 'game_id',
 'game_event_id',
 'player_id',
 'player_name',
 'team_id',
 'team_name',
 'period',
 'minutes_remaining',
 'seconds_remaining',
 'event_type',
 'action_type',
 'shot_type',
 'shot_zone_basic',
 'shot_zone_area',
 'shot_zone_range',
 'shot_distance',
 'loc_x',
 'loc_y',
 'shot_attempted_flag',
 'shot_made_flag',
 'game_date',
 'htm',
 'vtm']

In [17]:
rows = shots['resultSets'][0]['rowSet']

In [18]:
df = pd.DataFrame(data=rows, columns=columnHeaders)
df.head(4)

Unnamed: 0,grid_type,game_id,game_event_id,player_id,player_name,team_id,team_name,period,minutes_remaining,seconds_remaining,...,shot_zone_area,shot_zone_range,shot_distance,loc_x,loc_y,shot_attempted_flag,shot_made_flag,game_date,htm,vtm
0,Shot Chart Detail,20900015,4,201939,Stephen Curry,1610612744,Golden State Warriors,1,11,25,...,Right Side Center(RC),24+ ft.,26,99,249,1,0,20091028,GSW,HOU
1,Shot Chart Detail,20900015,17,201939,Stephen Curry,1610612744,Golden State Warriors,1,9,31,...,Left Side Center(LC),16-24 ft.,18,-122,145,1,1,20091028,GSW,HOU
2,Shot Chart Detail,20900015,53,201939,Stephen Curry,1610612744,Golden State Warriors,1,6,2,...,Center(C),8-16 ft.,14,-60,129,1,0,20091028,GSW,HOU
3,Shot Chart Detail,20900015,141,201939,Stephen Curry,1610612744,Golden State Warriors,2,9,49,...,Left Side(L),16-24 ft.,19,-172,82,1,0,20091028,GSW,HOU


## Clean the data

In [19]:
df.period.unique()

array([1, 2, 4, 3, 5, 6], dtype=int64)

In [20]:
df = df.query('period < 5.0')

In [21]:
df.nunique(dropna=False)

grid_type                1
game_id                692
game_event_id          692
player_id                1
player_name              1
team_id                  1
team_name                1
period                   4
minutes_remaining       12
seconds_remaining       60
event_type               2
action_type             51
shot_type                2
shot_zone_basic          7
shot_zone_area           6
shot_zone_range          5
shot_distance           71
loc_x                  489
loc_y                  437
shot_attempted_flag      1
shot_made_flag           2
game_date              692
htm                     32
vtm                     32
dtype: int64

In [22]:
shots = df.drop(columns=['grid_type','game_event_id','team_id',
    'team_name','player_id','shot_zone_range','shot_zone_basic',
    'shot_zone_area','event_type','action_type', 'minutes_remaining',
    'seconds_remaining', 'shot_distance','player_name','period','htm',
    'vtm','shot_attempted_flag'])

In [23]:
df.game_date = pd.to_datetime(df.game_date) # period 1

## Prepare the data

In [25]:
shots.head(3)

Unnamed: 0,game_id,shot_type,loc_x,loc_y,shot_made_flag,game_date
0,20900015,3PT Field Goal,99,249,0,20091028
1,20900015,2PT Field Goal,-122,145,1,20091028
2,20900015,2PT Field Goal,-60,129,0,20091028


In [26]:
df.set_index('game_id', inplace=True)

In [32]:
def get_season(row):
    if row.game_date.month > 6:
        season = f'{row.game_date.year}-{row.game_date.year + 1}' 
    else:
        season = f'{row.game_date.year - 1}-{row.game_date.year}'
    return season

df['season'] = df.apply(get_season, axis=1)
df.head()

Unnamed: 0_level_0,grid_type,game_event_id,player_id,player_name,team_id,team_name,period,minutes_remaining,seconds_remaining,event_type,...,shot_zone_range,shot_distance,loc_x,loc_y,shot_attempted_flag,shot_made_flag,game_date,htm,vtm,season
game_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20900015,Shot Chart Detail,4,201939,Stephen Curry,1610612744,Golden State Warriors,1,11,25,Missed Shot,...,24+ ft.,26,99,249,1,0,2009-10-28,GSW,HOU,2009-2010
20900015,Shot Chart Detail,17,201939,Stephen Curry,1610612744,Golden State Warriors,1,9,31,Made Shot,...,16-24 ft.,18,-122,145,1,1,2009-10-28,GSW,HOU,2009-2010
20900015,Shot Chart Detail,53,201939,Stephen Curry,1610612744,Golden State Warriors,1,6,2,Missed Shot,...,8-16 ft.,14,-60,129,1,0,2009-10-28,GSW,HOU,2009-2010
20900015,Shot Chart Detail,141,201939,Stephen Curry,1610612744,Golden State Warriors,2,9,49,Missed Shot,...,16-24 ft.,19,-172,82,1,0,2009-10-28,GSW,HOU,2009-2010
20900015,Shot Chart Detail,249,201939,Stephen Curry,1610612744,Golden State Warriors,2,2,19,Missed Shot,...,16-24 ft.,16,-68,148,1,0,2009-10-28,GSW,HOU,2009-2010


In [33]:
df['shot_result'] = df.shot_made_flag.replace({0:'Missed', 1:'Made'})

In [34]:
df['points_made'] = df.apply(lambda x: 0 if x.shot_result == 'Missed' else 
                          (3 if x.shot_type == '3PT Field Goal' else 2), axis=1)

In [None]:
df['points_made_game'] = df.groupby('game_id').points_made.transform('sum')

In [None]:
df['shots_attempted'] = df.groupby('game_id').shot_made_flag.transform('count')

In [None]:
df['shots_made'] = df.groupby('game_id').shot_made_flag.transform('sum')

In [None]:
df[['shot_type','points_made','points_made_game','shots_attempted','shots_made']]

## Plot the summary data

## Plot the shots for two games

In [24]:
# SOURCE: http://savvastjortjoglou.com/nba-shot-sharts.html
from matplotlib.patches import Circle, Rectangle, Arc
def draw_court(ax=None, color='black', lw=2, outer_lines=False):
    # If an axes object isn't provided to plot onto, just get current one
    if ax is None:
        ax = plt.gca()

    # Create the various parts of an NBA basketball court

    # Create the basketball hoop
    # Diameter of a hoop is 18" so it has a radius of 9", which is a value
    # 7.5 in our coordinate system
    hoop = Circle((0, 0), radius=7.5, linewidth=lw, color=color, fill=False)

    # Create backboard
    backboard = Rectangle((-30, -7.5), 60, -1, linewidth=lw, color=color)

    # The paint
    # Create the outer box 0f the paint, width=16ft, height=19ft
    outer_box = Rectangle((-80, -47.5), 160, 190, linewidth=lw, color=color,
                          fill=False)
    # Create the inner box of the paint, widt=12ft, height=19ft
    inner_box = Rectangle((-60, -47.5), 120, 190, linewidth=lw, color=color,
                          fill=False)

    # Create free throw top arc
    top_free_throw = Arc((0, 142.5), 120, 120, theta1=0, theta2=180,
                         linewidth=lw, color=color, fill=False)
    # Create free throw bottom arc
    bottom_free_throw = Arc((0, 142.5), 120, 120, theta1=180, theta2=0,
                            linewidth=lw, color=color, linestyle='dashed')
    # Restricted Zone, it is an arc with 4ft radius from center of the hoop
    restricted = Arc((0, 0), 80, 80, theta1=0, theta2=180, linewidth=lw,
                     color=color)

    # Three point line
    # Create the side 3pt lines, they are 14ft long before they begin to arc
    corner_three_a = Rectangle((-220, -47.5), 0, 140, linewidth=lw,
                               color=color)
    corner_three_b = Rectangle((220, -47.5), 0, 140, linewidth=lw, color=color)
    # 3pt arc - center of arc will be the hoop, arc is 23'9" away from hoop
    # I just played around with the theta values until they lined up with the 
    # threes
    three_arc = Arc((0, 0), 475, 475, theta1=22, theta2=158, linewidth=lw,
                    color=color)

    # Center Court
    center_outer_arc = Arc((0, 422.5), 120, 120, theta1=180, theta2=0,
                           linewidth=lw, color=color)
    center_inner_arc = Arc((0, 422.5), 40, 40, theta1=180, theta2=0,
                           linewidth=lw, color=color)

    # List of the court elements to be plotted onto the axes
    court_elements = [hoop, backboard, outer_box, inner_box, top_free_throw,
                      bottom_free_throw, restricted, corner_three_a,
                      corner_three_b, three_arc, center_outer_arc,
                      center_inner_arc]

    if outer_lines:
        # Draw the half court line, baseline and side out bound lines
        outer_lines = Rectangle((-250, -47.5), 500, 470, linewidth=lw,
                                color=color, fill=False)
        court_elements.append(outer_lines)

    # Add the court elements onto the axes
    for element in court_elements:
        ax.add_patch(element)

    return ax

## Plot shot data for two seasons

## Plot shot density for one season

## Plot shot density for two seasons