## Testing out some code to hit the NHL API :)


Resources

* https://towardsdatascience.com/nhl-analytics-with-python-6390c5d3206d
* https://nhl-api-explorer.herokuapp.com/
* https://gitlab.com/dword4/nhlapi
* https://gitlab.com/dword4/nhlapi/-/blob/master/stats-api.md#game-ids
* https://github.com/dword4/nhlapi

Game IDs
* The first 4 digits identify the season of the game (ie. 2017 for the 2017-2018 season). 
* The next 2 digits give the type of game, where 01 = preseason, 02 = regular season, 03 = playoffs, 04 = all-star. 
* The final 4 digits identify the specific game number. 
* For regular season and preseason games, this ranges from 0001 to the number of games played. (1271 for seasons with 31 teams (2017 and onwards) and 1230 for seasons with 30 teams). 
* For playoff games, the 2nd digit of the specific number gives the round of the playoffs, the 3rd digit specifies the matchup, and the 4th digit specifies the game (out of 7).

In [235]:
# Load dependencies
import requests
import pandas as pd
import json
import os
import pathlib
import numpy as np

In [7]:
# Set up the API call variables
year = '2021'
season_type = '02' 
max_game_id = 1290
boxscore_url = '/boxscore'
livefeed_url = '/feed/live'
game_url = 'http://statsapi.web.nhl.com/api/v1/game/'

In [8]:
# Test hitting the boxscore and live feed endpoints, save as JSON files
def request_get_data(url, 
                    year, 
                    season_type, 
                    max_game_id, 
                    endpoint,
                    empty_list = [],
                    zfill = 4, 
                    multiple_games = True):

    # If multiple_games param is true, the get data for n number of games (uses for loop)
    if multiple_games == True:    
        for i in range(1, max_game_id, 1):
            game_id = str(year) + str(season_type) + str(i).zfill(zfill)
            game_id_endpoint = str(url) + game_id + str(endpoint)
            r = requests.get(game_id_endpoint)
            data = r.json()
            data['game_id'] = game_id
            empty_list.append(data)

    # If multiple_games param is not true, then get data for the 1 game (no for loop)
    else:
        game_id = str(year) + str(season_type) + str(max_game_id).zfill(zfill)
        game_id_endpoint = str(url) +  game_id + str(endpoint)
        r = requests.get(game_id_endpoint)
        data = r.json()
        data['game_id'] = game_id
        empty_list.append(data)

    # Rename to game_data_json
    game_data_list = empty_list

    return game_data_list

# Function that takes in a list, converts to json, then saves a copy of that json file in the local folder
def save_as_json(list, 
                 file_name = 'data'):

    # Save as json file in same folder the notebook is run (relative)
    with open(f'{file_name}.json', 'w') as f:
        json.dump(list, f)

    # Convert list to json
    j_data = json.dumps(list)
 
    
    return j_data

In [9]:
# Create a folder to store the data
def mk_dir(rel_path = "test_data/"):
    try:
        #os.makedirs(path, exist_ok=True)
        pathlib.Path(rel_path).mkdir(parents=True, exist_ok=True) 

    except OSError:
        print ("Creation of the directory %s failed" % rel_path)
    else:
        print ("Successfully created the directory %s " % rel_path)

In [18]:
# Set inputs & prepare directory to story data
mk_dir(rel_path = "test_data/")
single_game_id = 10

# Boxscore
boxscore_list = request_get_data(game_url, 
                                year, 
                                season_type, 
                                single_game_id, 
                                boxscore_url,
                                multiple_games = True)

boxscore_json = save_as_json(boxscore_list, 'test_data/boxscore')

# Livefeed
livefeed_list = request_get_data(game_url, 
                                year, 
                                season_type, 
                                single_game_id, 
                                livefeed_url,
                                multiple_games = True)
livefeed_json = save_as_json(livefeed_list, 'test_data/livefeed')

Successfully created the directory test_data/ 


In [11]:
# Uncomment the below code to print the game ids used to get each game
'''
max_game_id = 1290

#z_id = str(game_url) + str(year) + str(season_type) + str(max_game_id).zfill(4) + str(boxscore_url)
#for i in range(0, max_game_id, 1):
#    z_id = str(game_url) + str(year) + str(season_type) + str(i).zfill(4) + str(boxscore_url)
#    print(f'game: {i}, game_id: {z_id}')
'''

"\nmax_game_id = 1290\n\n#z_id = str(game_url) + str(year) + str(season_type) + str(max_game_id).zfill(4) + str(boxscore_url)\n#for i in range(0, max_game_id, 1):\n#    z_id = str(game_url) + str(year) + str(season_type) + str(i).zfill(4) + str(boxscore_url)\n#    print(f'game: {i}, game_id: {z_id}')\n"

In [12]:
# Setup json object
json_object = json.loads(boxscore_json)

# Set up iterable variables
games = len(json_object)
#player_id = list(json_object[0]['teams']['home']['players'].keys())
raw = []

# Loop over every game
for game in range(0, (games), 1):
    game_id = json_object[game]["game_id"]

    # Loop over each home player in each team
    for player in list(json_object[game]['teams']['home']['players'].keys()):
        player_data = json_object[game]['teams']['home']['players'][player]['person']
        player_data ["home_team"] = 1
        player_data["pid"] = player
        player_data["game_id"] = game_id
        raw.append(player_data)

    # Loop over each away player in each team
    for player in list(json_object[game]['teams']['away']['players'].keys()):
        player_data = json_object[game]['teams']['away']['players'][player]['person']
        player_data ["home_team"] = 0
        player_data["pid"] = player
        player_data["game_id"] = game_id
        raw.append(player_data)

# Convert to pandas dataframe
df1 = pd.DataFrame.from_dict(raw)
df1 = df1.rename(columns={"id": "pid_num"})


# Flatten currentTeam
df2 = pd.concat([df1.drop(['currentTeam'], axis=1), df1['currentTeam'].apply(pd.Series)], axis=1)
df2 = df2.rename(columns={"id": "team_id", "name": "team_name", "link": "team_link"})

# Flatten primaryPosition
df2 = pd.concat([df2.drop(['primaryPosition'], axis=1), df2['primaryPosition'].apply(pd.Series)], axis=1)
df2 = df2.rename(columns={"code": "pos_code", "name": "pos_name", "type": "pos_type", "abbreviation": "pos_abbr"})

# Copy to player_stats
player_data = df2

# See the data
player_data.head(3)

Unnamed: 0,pid_num,fullName,team_link,firstName,lastName,primaryNumber,birthDate,currentAge,birthCity,birthCountry,...,pid,game_id,birthStateProvince,team_id,team_name,team_link.1,pos_code,pos_name,pos_type,pos_abbr
0,8480172,Jan Rutta,/api/v1/people/8480172,Jan,Rutta,44,1990-07-29,31,Pisek,CZE,...,ID8480172,2021020001,,14,Tampa Bay Lightning,/api/v1/teams/14,D,Defenseman,Defenseman,D
1,8478519,Anthony Cirelli,/api/v1/people/8478519,Anthony,Cirelli,71,1997-07-15,24,Etobicoke,CAN,...,ID8478519,2021020001,ON,14,Tampa Bay Lightning,/api/v1/teams/14,C,Center,Forward,C
2,8478416,Erik Cernak,/api/v1/people/8478416,Erik,Cernak,81,1997-05-28,24,Kosice,SVK,...,ID8478416,2021020001,,14,Tampa Bay Lightning,/api/v1/teams/14,D,Defenseman,Defenseman,D


In [13]:
#df2.groupby('team_name').count()

In [14]:
# Setup json object
json_object = json.loads(boxscore_json)

# Set up iterable variables
games = len(json_object)
#player_id = list(json_object[0]['teams']['home']['players'].keys())
raw = []

# Loop over every game
for game in range(0, (games), 1):
    game_id = json_object[game]["game_id"]

    # Loop over each home player in each team
    for player in list(json_object[game]['teams']['home']['players'].keys()):
        player_stats = json_object[game]['teams']['home']['players'][player]['stats']
        player_stats ["home_team"] = 1
        player_stats["pid"] = player
        player_stats["game_id"] = game_id
        raw.append(player_stats)

    # Loop over each away player in each team
    for player in list(json_object[game]['teams']['away']['players'].keys()):
        player_stats = json_object[game]['teams']['away']['players'][player]['stats']
        player_stats ["home_team"] = 0
        player_stats["pid"] = player
        player_stats["game_id"] = game_id
        raw.append(player_stats)

# Convert to pandas dataframe
df1 = pd.DataFrame.from_dict(raw)
df1.head(3)

# Flatten the pandas dataframe
df1 = pd.json_normalize(raw, sep='_')

# Copy to player_stats
player_stats = df1

# Show first few rows
player_stats.head(3)

Unnamed: 0,home_team,pid,game_id,skaterStats_timeOnIce,skaterStats_assists,skaterStats_goals,skaterStats_shots,skaterStats_hits,skaterStats_powerPlayGoals,skaterStats_powerPlayAssists,...,goalieStats_shortHandedSaves,goalieStats_evenSaves,goalieStats_shortHandedShotsAgainst,goalieStats_evenShotsAgainst,goalieStats_powerPlayShotsAgainst,goalieStats_decision,goalieStats_savePercentage,goalieStats_evenStrengthSavePercentage,goalieStats_powerPlaySavePercentage,goalieStats_shortHandedSavePercentage
0,1,ID8480172,2021020001,15:55,0.0,0.0,3.0,0.0,0.0,0.0,...,,,,,,,,,,
1,1,ID8478519,2021020001,17:38,1.0,1.0,2.0,3.0,0.0,0.0,...,,,,,,,,,,
2,1,ID8478416,2021020001,20:16,0.0,0.0,4.0,2.0,0.0,0.0,...,,,,,,,,,,


In [15]:
player_boxscore =  pd.merge(left = player_stats, 
                            right = player_data, 
                            how = 'left',
                            left_on = ['pid', 'game_id', 'home_team'],
                            right_on = ['pid', 'game_id', 'home_team'])

player_boxscore.head(3)

Unnamed: 0,home_team,pid,game_id,skaterStats_timeOnIce,skaterStats_assists,skaterStats_goals,skaterStats_shots,skaterStats_hits,skaterStats_powerPlayGoals,skaterStats_powerPlayAssists,...,shootsCatches,rosterStatus,birthStateProvince,team_id,team_name,team_link,pos_code,pos_name,pos_type,pos_abbr
0,1,ID8480172,2021020001,15:55,0.0,0.0,3.0,0.0,0.0,0.0,...,R,Y,,14,Tampa Bay Lightning,/api/v1/teams/14,D,Defenseman,Defenseman,D
1,1,ID8478519,2021020001,17:38,1.0,1.0,2.0,3.0,0.0,0.0,...,L,Y,ON,14,Tampa Bay Lightning,/api/v1/teams/14,C,Center,Forward,C
2,1,ID8478416,2021020001,20:16,0.0,0.0,4.0,2.0,0.0,0.0,...,R,Y,,14,Tampa Bay Lightning,/api/v1/teams/14,D,Defenseman,Defenseman,D


In [16]:
player_boxscore.columns

Index(['home_team', 'pid', 'game_id', 'skaterStats_timeOnIce',
       'skaterStats_assists', 'skaterStats_goals', 'skaterStats_shots',
       'skaterStats_hits', 'skaterStats_powerPlayGoals',
       'skaterStats_powerPlayAssists', 'skaterStats_penaltyMinutes',
       'skaterStats_faceOffWins', 'skaterStats_faceoffTaken',
       'skaterStats_takeaways', 'skaterStats_giveaways',
       'skaterStats_shortHandedGoals', 'skaterStats_shortHandedAssists',
       'skaterStats_blocked', 'skaterStats_plusMinus',
       'skaterStats_evenTimeOnIce', 'skaterStats_powerPlayTimeOnIce',
       'skaterStats_shortHandedTimeOnIce', 'skaterStats_faceOffPct',
       'goalieStats_timeOnIce', 'goalieStats_assists', 'goalieStats_goals',
       'goalieStats_pim', 'goalieStats_shots', 'goalieStats_saves',
       'goalieStats_powerPlaySaves', 'goalieStats_shortHandedSaves',
       'goalieStats_evenSaves', 'goalieStats_shortHandedShotsAgainst',
       'goalieStats_evenShotsAgainst', 'goalieStats_powerPlayShotsAga

In [17]:
mini_player_boxscore = player_boxscore[['pid', 'game_id','fullName', 'pos_type', 'pos_name', 'rookie', 'home_team', 'team_id',
       'team_name', 'skaterStats_timeOnIce',
       'skaterStats_assists', 'skaterStats_goals', 'skaterStats_shots',
       'skaterStats_hits', 'skaterStats_powerPlayGoals',
       'skaterStats_powerPlayAssists', 'skaterStats_penaltyMinutes',
       'skaterStats_faceOffWins', 'skaterStats_faceoffTaken',
       'skaterStats_takeaways', 'skaterStats_giveaways',
       'skaterStats_shortHandedGoals', 'skaterStats_shortHandedAssists',
       'skaterStats_blocked', 'skaterStats_plusMinus',
       'skaterStats_evenTimeOnIce', 'skaterStats_powerPlayTimeOnIce',
       'skaterStats_shortHandedTimeOnIce', 'skaterStats_faceOffPct']]

mini_player_boxscore.head(5)

Unnamed: 0,pid,game_id,fullName,pos_type,pos_name,rookie,home_team,team_id,team_name,skaterStats_timeOnIce,...,skaterStats_takeaways,skaterStats_giveaways,skaterStats_shortHandedGoals,skaterStats_shortHandedAssists,skaterStats_blocked,skaterStats_plusMinus,skaterStats_evenTimeOnIce,skaterStats_powerPlayTimeOnIce,skaterStats_shortHandedTimeOnIce,skaterStats_faceOffPct
0,ID8480172,2021020001,Jan Rutta,Defenseman,Defenseman,False,1,14,Tampa Bay Lightning,15:55,...,0.0,0.0,0.0,0.0,1.0,0.0,15:55,0:00,0:00,
1,ID8478519,2021020001,Anthony Cirelli,Forward,Center,False,1,14,Tampa Bay Lightning,17:38,...,1.0,0.0,0.0,0.0,0.0,0.0,16:06,0:27,1:05,66.67
2,ID8478416,2021020001,Erik Cernak,Defenseman,Defenseman,False,1,14,Tampa Bay Lightning,20:16,...,0.0,0.0,0.0,0.0,0.0,-1.0,19:37,0:00,0:39,
3,ID8474567,2021020001,Zach Bogosian,Defenseman,Defenseman,False,1,14,Tampa Bay Lightning,14:14,...,0.0,1.0,0.0,0.0,0.0,-2.0,14:14,0:00,0:00,
4,ID8470621,2021020001,Corey Perry,Forward,Right Wing,False,1,14,Tampa Bay Lightning,12:40,...,0.0,0.0,0.0,0.0,1.0,-2.0,12:13,0:27,0:00,


Fiddling with the livefeed api...

In [None]:
# Setup json object
json_object = json.loads(livefeed_json)

# Set up iterable variables
games = len(json_object)
#player_id = list(json_object[0]['teams']['home']['players'].keys())
raw = []

# Loop over every game
for game in range(0, (games), 1):
    game_id = json_object[game]["game_id"]

    # Loop over each home player in each team
    for player in list(json_object[game]['teams']['home']['players'].keys()):
        player_data = json_object[game]['teams']['home']['players'][player]['person']
        player_data["home_team"] = 1
        player_data["pid"] = player
        player_data["game_id"] = game_id
        raw.append(player_data)

    # Loop over each away player in each team
    for player in list(json_object[game]['teams']['away']['players'].keys()):
        player_data = json_object[game]['teams']['away']['players'][player]['person']
        player_data["home_team"] = 0
        player_data["pid"] = player
        player_data["game_id"] = game_id
        raw.append(player_data)

# Convert to pandas dataframe
df1 = pd.DataFrame.from_dict(raw)
df1 = df1.rename(columns={"id": "pid_num"})


# Flatten currentTeam
df2 = pd.concat([df1.drop(['currentTeam'], axis=1), df1['currentTeam'].apply(pd.Series)], axis=1)
df2 = df2.rename(columns={"id": "team_id", "name": "team_name", "link": "team_link"})

# Flatten primaryPosition
df2 = pd.concat([df2.drop(['primaryPosition'], axis=1), df2['primaryPosition'].apply(pd.Series)], axis=1)
df2 = df2.rename(columns={"code": "pos_code", "name": "pos_name", "type": "pos_type", "abbreviation": "pos_abbr"})

# Copy to player_stats
player_data = df2

# See the data
player_data.head(3)

In [20]:
json_object = json.loads(livefeed_json)

# Set up iterable variables
games = len(json_object)

games

26

In [143]:
# For each play resulting in a shot or goal, get the player name, event type, and the x y coordinates of the shot
#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#

# Setup json object
json_object = json.loads(livefeed_json)

# Set up iterable objects
games = len(json_object)
event_types = ['Shot','Goal']
raw_d = {"game_id":[], "player_id":[], "player_type":[], "x_coord":[], "y_coord":[], "play_id":[], "event":[], "event_type":[]}

# Loop over every game
for game in json_object:

    # Look for live data - if it doesnt exist, go to next game
    if 'liveData' not in game:
        continue
    
    # If it does exist, drill down into liveData>plays>allPlays
    plays = game['liveData']['plays']['allPlays']
    # Store the game_id
    game_id = game['gamePk']

    # There so many PLAYS... we want to loop through each play...
    for play in plays:
        
        #.. and search for the EVENTS we defined earlier (Shot & Goal)
        for event in event_types:

            # If the play contains one of the events...
            if play['result']['event'] in ['Shot']:
                # ... and if the event contains coordinates...
                        if 'x' in play['coordinates']:
                            # Save the coordinates to the growing list
                            raw_d["game_id"].append(game_id)
                            raw_d["player_id"].append(play['players'][0]['player']['id'])
                            raw_d["x_coord"].append(play['coordinates']['x'])
                            raw_d["y_coord"].append(play['coordinates']['y'])
                            raw_d["player_type"].append(play['players'][0]['player']['fullName'])
                            raw_d["play_id"].append(play['about']['player']['fullName'])
                            raw_d["event"].append(play['players'][0]['player']['fullName'])
                            raw_d["event_type"].append(play['players'][0]['player']['fullName'])

    

# Convert to pandas dataframe
df1 = pd.DataFrame.from_dict(raw_d)

df1.head(5)

Unnamed: 0,game_id,player_id,player_type,x_coord,y_coord
0,2021020001,8474564,Steven Stamkos,61.0,-32.0
1,2021020001,8474564,Steven Stamkos,61.0,-32.0
2,2021020001,8476934,Brock McGinn,-65.0,19.0
3,2021020001,8476934,Brock McGinn,-65.0,19.0
4,2021020001,8480172,Jan Rutta,-8.0,-27.0


In [285]:
# For each play resulting in a shot or goal, get the player name, event type, and the x y coordinates of the shot
#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#

# Setup json object
json_object = json.loads(livefeed_json)

# Set up iterable objects
games = len(json_object)
event_types = ['Shot','Goal']
raw_d = {
        "game_id":[], 
        "player_id":[], 
        "player_name":[], 
        "player_type":[], 
        "play_id": [],
        "x_coord":[], 
        "y_coord":[], 
        "event":[],
        "event_type":[],
        "event_desc": [],
        "period": [],
        "period_time": []

        }

# Loop over every game
for game in json_object:

    # Look for live data - if it doesnt exist, go to next game
    if 'liveData' not in game:
        continue
    
    # If it does exist, drill down into liveData>plays>allPlays
    plays = game['liveData']['plays']['allPlays']
    # Store the game_id
    game_id = game['gamePk']

     # There so many PLAYS... we want to loop through each play...
    for play in plays:
        if 'players' in play:
            for player in play['players']:
                if player['playerType'] in ["Shooter", "Scorer"]:
                    player_type = player['playerType']
                    player_id = player['player']['id']
                    player_name = player['player']['fullName']

                #.. and search for the EVENTS we defined earlier (Shot & Goal)
                for event in event_types:

                    # If the play contains one of the events...
                    if play['result']['event'] in event_types:
                        # ... and if the event contains coordinates...
                                if 'x' in play['coordinates']:
                                    # Save the coordinates to the growing list
                                    raw_d["game_id"].append(game_id)
                                    raw_d["player_id"].append(player_id)
                                    raw_d["x_coord"].append(play['coordinates']['x'])
                                    raw_d["y_coord"].append(play['coordinates']['y'])
                                    raw_d["player_name"].append(player_name)
                                    raw_d["player_type"].append(player_type)
                                    raw_d["event"].append(play['result']['event'])
                                    raw_d["event_type"].append(play['result']['secondaryType'])
                                    raw_d["event_desc"].append(play['result']['description'])
                                    raw_d["period"].append(play['about']['period'])
                                    raw_d["period_time"].append(play['about']['periodTime'])
                                    raw_d["play_id"].append(play['about']['eventIdx'])


# This loop causes duplication (x4), not sure where...

# Convert to pandas dataframe
df1 = pd.DataFrame.from_dict(raw_d)

# Drop dups
df2 = df1.drop_duplicates()

# Add some features
df2['goal'] = np.where(df2['event']== 'Goal', 1, 0)
df2['league'] = 'NHL'

# See data
df2.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['goal'] = np.where(df2['event']== 'Goal', 1, 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['league'] = 'NHL'


Unnamed: 0,game_id,player_id,player_name,player_type,play_id,x_coord,y_coord,event,event_type,event_desc,period,period_time,goal,league
0,2021020001,8474564,Steven Stamkos,Shooter,8,61.0,-32.0,Shot,Wrist Shot,Steven Stamkos Wrist Shot saved by Tristan Jarry,1,01:03,0,NHL
4,2021020001,8476934,Brock McGinn,Shooter,13,-65.0,19.0,Shot,Wrist Shot,Brock McGinn Wrist Shot saved by Andrei Vasile...,1,01:44,0,NHL
8,2021020001,8480172,Jan Rutta,Shooter,15,-8.0,-27.0,Shot,Wrist Shot,Jan Rutta Wrist Shot saved by Tristan Jarry,1,02:01,0,NHL
12,2021020001,8478046,Danton Heinen,Shooter,19,-60.0,-4.0,Shot,Wrist Shot,Danton Heinen Wrist Shot saved by Andrei Vasil...,1,02:47,0,NHL
16,2021020001,8476292,Ondrej Palat,Shooter,22,63.0,4.0,Shot,Wrist Shot,Ondrej Palat Wrist Shot saved by Tristan Jarry,1,03:07,0,NHL


## Analyzing shots & goals

In [296]:
# Get player summaries
player_shots = df2.groupby(['player_name', 'player_id', 'league']).agg({'goal':['count', 'sum']})
player_shots.columns = ["_".join(x) for x in player_shots.columns.ravel()]
player_shots = player_shots.rename(columns = {"goal_count": "sog", "goal_sum": "goals"})
player_shots = player_shots.reset_index()
player_shots['conversion'] = round(player_shots['goals'] / player_shots['sog'], 4)

# Get league summaries
league_shots = df2.groupby('league').agg({'goal':['count', 'sum']})
league_shots.columns = ["_".join(x) for x in league_shots.columns.ravel()]
league_shots = league_shots.rename(columns = {"goal_count": "sog", "goal_sum": "goals"})
league_shots = league_shots.reset_index()
league_shots['conversion'] = round(league_shots['goals'] / league_shots['sog'], 4)

# Join player summaries with league summaries
df3 = player_shots.merge(league_shots, on = "league", suffixes = ("_player", "_league"))
df3["conv_spread"] = df3["conversion_player"] - df3["conversion_league"]
df3.sort_values(by = "sog_player", ascending = False).head(10)

  player_shots.columns = ["_".join(x) for x in player_shots.columns.ravel()]
  league_shots.columns = ["_".join(x) for x in league_shots.columns.ravel()]


Unnamed: 0,player_name,player_id,league,sog_player,goals_player,conversion_player,sog_league,goals_league,conversion_league,conv_spread
208,William Nylander,8477939,NHL,11,2,0.1818,611,61,0.0998,0.082
139,Mitchell Marner,8478483,NHL,10,0,0.0,611,61,0.0998,-0.0998
150,Nikita Kucherov,8476453,NHL,8,1,0.125,611,61,0.0998,0.0252
200,Victor Hedman,8475167,NHL,8,0,0.0,611,61,0.0998,-0.0998
155,Ondrej Kase,8478131,NHL,8,0,0.0,611,61,0.0998,-0.0998
134,Max Pacioretty,8474157,NHL,8,2,0.25,611,61,0.0998,0.1502
100,John Tavares,8475166,NHL,8,0,0.0,611,61,0.0998,-0.0998
66,Erik Cernak,8478416,NHL,7,0,0.0,611,61,0.0998,-0.0998
156,Ondrej Palat,8476292,NHL,7,2,0.2857,611,61,0.0998,0.1859
196,Tyler Toffoli,8475726,NHL,7,0,0.0,611,61,0.0998,-0.0998


## Shot location analysis

What we are going to do first is set up a binning grid. I want to understand where shots are taken on the ice, but I do not want to see all individual shots. By taking spatial average we can make a more insightful and visual representation. The position data from the API extends from:

* X: -100 to 100 (meters)
* Y: -42.5 to 42.5 (meters)


For our binning we are using hex plots from matplotlib to extract the raw binning data and will use drawn rectangles (again matplotlib) for the final visuals.
To start we define our figure dimensions and grid size:

In [297]:
# To keep the aspect ration correct we use a square figure size
xbnds = np.array([-100.,100.0])
ybnds = np.array([-100,100])
extent = [xbnds[0],xbnds[1],ybnds[0],ybnds[1]]
# We are going to bin in 30 unit increments.  It is fun to play with this!  
gridsize= 30;mincnt=0

Next we will find the efficiency of the league at each location on the ice. To do this we call the hexbin method and extract the location vertex and count data.

**Once thing to note is that since the scorer never scores on their own net we must make sure the negative locations are flipped to always represent the attacking side.**

This is all because every period the player switch sides and the coordinate system is fixed.

In [None]:
# First concatenate the arrays for x and y league data
league_x_all_shots = league_data['Shot']['x'] 
   + league_data['Goal']['x'];
league_y_all_shots = league_data['Shot']['y'] 
   + league_data['Goal']['y']
# Perform the coordinate flipping!
league_x_all_shots_normalized = [];
league_y_all_shots_normalized = []
# Enumerate the list so we can use the index for y also
for i,s in enumerate(league_x_all_shots):
    if league_x_all_shots[i] <0:
        league_x_all_shots_normalized.append(-league_x_all_shots[i])
        league_y_all_shots_normalized.append(-league_y_all_shots[i])
    else:
        league_x_all_shots_normalized.append(league_x_all_shots[i])
        league_y_all_shots_normalized.append(league_y_all_shots[i])
        
# Do the same treatment for the goals
league_x_goal_normalized = [];
league_y_goal_normalized=[]
for i,s in enumerate(league_data['Goal']['x']):
    if league_data['Goal']['x'][i] <0:
       league_x_goal_normalized.append(-league_data['Goal']['x'][i])
       league_y_goal_normalized.append(-league_data['Goal']['y'][i])
    else:
       league_x_goal_normalized.append(league_data['Goal']['x'][i])
       league_y_goal_normalized.append(league_data['Goal']['y'][i])

dom - you are following this: https://towardsdatascience.com/nhl-analytics-with-python-6390c5d3206d

you need to convert your data to his format to enable the shot location analysis.

something like... all shots and goals x coords in one list, another for y coords.... then can begin