In [1]:
import json
import pandas as pd
import os

Create a function to convert all events of every game into a pandas dataframe.

For this milestone, you will want to include events of the type “shots” and “goals”. You can ignore missed shots or blocked shots for now. For each event, you will want to include as features (at minimum): game time/period information, game ID, team information (which team took the shot), indicator if its a shot or a goal, the on-ice coordinates, the shooter and goalie name (don’t worry about assists for now), shot type, if it was on an empty net, and whether or not a goal was at even strength, shorthanded, or on the power play.


In [3]:
def convert_single_play_data(raw_data):
    single_play_data_list = []

    # print(season)
    for single_play in raw_data['liveData']['plays']['allPlays']:
      event_type =  single_play['result']['event']
      event_data = {
          'event_type': event_type,
          # get the game ID
          'gameID': raw_data['gamePk'],
          # print(gameID)
          'gameType': raw_data['gameData']['game']['type'],
          # print(gameType)
          'home': raw_data['gameData']['teams']['home']['name'],
          # print(home)
          'away': raw_data['gameData']['teams']['away']['name'],
          # print(away)
          'season': raw_data['gameData']['game']['season']
      }
      if event_type in ['Shot', 'Goal']:
        # get the game time/period information
        event_data['game_time'] = single_play['about']['dateTime']
        event_data['game_period'] = single_play['about']['period']
        event_data['team'] = single_play['team']['name']

        # get the on-ice coordinates
        event_data['x_coordinate'] = single_play['coordinates'].get('x', None),
        event_data['y_coordinate'] = single_play['coordinates'].get('y', None),

        # get the short type
        event_data['shot_type'] = single_play['result'].get('secondaryType',None)

        if event_type == 'Shot':
          event_data['is_goal'] = False
          # Extracting shooter and goalie names
          for player in single_play['players']:
            if player['playerType'] == 'Shooter':
              event_data['shooter'] = player['player']['fullName']
            elif player['playerType'] == 'Goalie':
              event_data['goalie'] = player['player']['fullName']

        elif event_type == 'Goal':
          event_data['is_goal'] = True
          for player in single_play['players']:
              if player['playerType'] == 'Scorer':
                event_data['shooter'] = player['player']['fullName']
              if player['playerType'] == 'Goalie':
                event_data['goalie'] = player['player']['fullName']
          event_data['is_emptyNet'] = single_play['result'].get('emptyNet', None)
          event_data['strength'] = single_play['result'].get('strength',None).get('name', None)

        single_play_data_list.append(event_data)

    # Converting the list of event data into a Pandas DataFrame
    single_play_df = pd.DataFrame(single_play_data_list)
    return single_play_df

In [10]:
def process_game_json(file_path):
    with open(file_path, 'r') as file:
        raw_data = json.load(file)
    return convert_single_play_data(raw_data)

def concatenate_all_games_data(dataset_root_dir):
    all_games_data = []

    for root, dirs, files in os.walk(dataset_root_dir):
        print(f'Processing directory: {root}')  # Debugging information
        for file in files:
            if file.endswith(".json"):
                file_path = os.path.join(root, file)
                # print('Processing file: {file_path}')
                game_data_df = process_game_json(file_path)
                all_games_data.append(game_data_df)

    # Concatenate all individual game data DataFrames into a single DataFrame
    all_games_df = pd.concat(all_games_data, ignore_index=True)

    return all_games_df

# Usage:
dataset_root_dir = 'Data'
all_games_df = concatenate_all_games_data(dataset_root_dir)
all_games_df

Processing directory: Data
Processing directory: Data/20202021
Processing directory: Data/20202021/R
Processing directory: Data/20202021/P
Processing directory: Data/20192020
Processing directory: Data/20192020/R
Processing directory: Data/20192020/P
Processing directory: Data/20182019
Processing directory: Data/20182019/R
Processing directory: Data/20182019/P
Processing directory: Data/20172018
Processing directory: Data/20172018/R
Processing directory: Data/20172018/P
Processing directory: Data/20162017
Processing directory: Data/20162017/R
Processing directory: Data/20162017/P


Unnamed: 0,event_type,gameID,gameType,home,away,season,game_time,game_period,team,x_coordinate,y_coordinate,shot_type,is_goal,shooter,goalie,is_emptyNet,strength
0,Shot,2020020180,R,New Jersey Devils,New York Rangers,20202021,2021-03-05T00:10:40Z,1,New Jersey Devils,"(66.0,)","(31.0,)",Snap Shot,False,Jesper Bratt,Igor Shesterkin,,
1,Shot,2020020180,R,New Jersey Devils,New York Rangers,20202021,2021-03-05T00:14:59Z,1,New York Rangers,"(-35.0,)","(-6.0,)",Wrist Shot,False,Filip Chytil,Mackenzie Blackwood,,
2,Shot,2020020180,R,New Jersey Devils,New York Rangers,20202021,2021-03-05T00:16:31Z,1,New Jersey Devils,"(41.0,)","(-27.0,)",Snap Shot,False,Sami Vatanen,Igor Shesterkin,,
3,Goal,2020020180,R,New Jersey Devils,New York Rangers,20202021,2021-03-05T00:19:10Z,1,New Jersey Devils,"(61.0,)","(5.0,)",Snap Shot,True,Jack Hughes,Igor Shesterkin,False,Even
4,Shot,2020020180,R,New Jersey Devils,New York Rangers,20202021,2021-03-05T00:20:40Z,1,New Jersey Devils,"(36.0,)","(16.0,)",Wrist Shot,False,Ty Smith,Igor Shesterkin,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
387824,Shot,2016030153,P,Nashville Predators,Chicago Blackhawks,20162017,2017-04-18T05:01:24Z,4,Chicago Blackhawks,"(39.0,)","(33.0,)",Slap Shot,False,Brent Seabrook,Pekka Rinne,,
387825,Shot,2016030153,P,Nashville Predators,Chicago Blackhawks,20162017,2017-04-18T05:03:14Z,4,Nashville Predators,"(-75.0,)","(-5.0,)",Snap Shot,False,Kevin Fiala,Corey Crawford,,
387826,Shot,2016030153,P,Nashville Predators,Chicago Blackhawks,20162017,2017-04-18T05:05:35Z,4,Nashville Predators,"(-33.0,)","(1.0,)",Wrist Shot,False,Ryan Ellis,Corey Crawford,,
387827,Shot,2016030153,P,Nashville Predators,Chicago Blackhawks,20162017,2017-04-18T05:06:20Z,4,Chicago Blackhawks,"(25.0,)","(-20.0,)",Snap Shot,False,Marcus Kruger,Pekka Rinne,,
