In [1]:
import json
import pandas as pd
import os

Create a function to convert all events of every game into a pandas dataframe.

For this milestone, you will want to include events of the type “shots” and “goals”. You can ignore missed shots or blocked shots for now. For each event, you will want to include as features (at minimum): game time/period information, game ID, team information (which team took the shot), indicator if its a shot or a goal, the on-ice coordinates, the shooter and goalie name (don’t worry about assists for now), shot type, if it was on an empty net, and whether or not a goal was at even strength, shorthanded, or on the power play.


In [144]:
import numpy as np
import pandas as pd

def tidy_one_game_data(raw_data: dict):
    # Early exit if 'liveData' is not present
    if 'liveData' not in raw_data:
        return None

    # Initialize DataFrame
    columns = ['gameID', 'eventID', 'gameID_eventID', 'game_period', 'game_time', 'team', 'goal', 'x', 'y', 'shooter', 'goalie', 'shotType', 'emptyNet', 'strength', 'gameType', 'home', 'away', 'season', 'game_seconds', 'last_event', 'last_x_coordinate', 'last_y_coordinate', 'time_from_last_event', 'distance_from_last_event', 'rebound', 'speed', 'power_play_time','time_since_powerplay_started'] #, 'nbFriendly_non_goalie_skaters', 'nbOpposing_non_goalie_skaters']
    # df = pd.DataFrame(columns=columns)
    single_play_data_list = []

    # Extracting basic game info
    game_info = raw_data['gameData']['game']
    gameID, gameType, season = raw_data['gamePk'], game_info['type'], game_info['season']
    teams_info = raw_data['gameData']['teams']
    home, away = teams_info['home']['name'], teams_info['away']['name']

    # Initialize variables for penalty tracking
    penalties = {'team1': {'minor': [], 'doubleMinor': [], 'major': [], 'reserved': [], 'nbPlayerDown': 0},
                 'team2': {'minor': [], 'doubleMinor': [], 'major': [], 'reserved': [], 'nbPlayerDown': 0}}
    team_names = {home: 'team1', away: 'team2'}
    power_play_time = 0

    for index, play in enumerate(raw_data['liveData']['plays']['allPlays']):
        # Process each play
        about, result = play['about'], play['result']
        period, period_time = about['period'], about['periodTime']
        game_play_time = (period - 1) * 20 * 60 + int(period_time[:2]) * 60 + int(period_time[3:])

        # Update penalties and power play status
        update_penalties(penalties, game_play_time)
        power_play_time = update_power_play(penalties, power_play_time, game_play_time)

        # Process 'Shot' or 'Goal' plays
        if result['event'] in ['Shot', 'Goal']:
            play_data = process_play(play, gameID, home, away, penalties, team_names, game_play_time, power_play_time, index, raw_data)
            single_play_data_list.append(play_data)
        # Process Penalties and update penalties dictionary, either by adding the penalty to the active list or reserving it if there are already two penalties (BONUS)
        elif result['event'] == 'Penalty':# and play['about']['eventIdx'] in raw_data['liveData']['plays']['penaltyPlays']:
            penalty_minutes = result['penaltyMinutes']
            penalty_time = game_play_time + penalty_minutes * 60
            team_key = team_names[play['team']['name']]

            # Add penalty based on the minutes
            if penalty_minutes < 4:
                penalty_type = 'minor'
            elif penalty_minutes == 4:
                penalty_type = 'doubleMinor'
            else:
                penalty_type = 'major'

            if penalties[team_key]['nbPlayerDown'] < 2:
                penalties[team_key][penalty_type].insert(0, penalty_time)
                penalties[team_key]['nbPlayerDown'] += 1
            else:
                reserved_code = {'minor': 0, 'doubleMinor': 1, 'major': 2}[penalty_type]
                penalties[team_key]['reserved'].insert(0, reserved_code)

    # Drop rows with missing coordinates and return DataFrame
    return pd.DataFrame(single_play_data_list).dropna(axis=0, subset=['x_coordinate', 'y_coordinate'])

def update_penalties(penalties, game_play_time):
    """
    This function updates the penalty status for each team. The logic involves removing finished penalties and serving reserved penalties.
    """
    for team in ['team1', 'team2']:
        for penalty_type in ['minor', 'doubleMinor', 'major']:
            # Remove finished penalties
            while penalties[team][penalty_type] and penalties[team][penalty_type][-1] <= game_play_time:
                penalties[team][penalty_type].pop()
                if penalty_type != 'doubleMinor':
                    penalties[team]['nbPlayerDown'] -= 1
                elif len(penalties[team]['doubleMinor']) % 2 == 0:
                    penalties[team]['nbPlayerDown'] -= 1

            # Serve reserved penalties
            while penalties[team]['reserved'] and penalties[team]['nbPlayerDown'] < 2:
                r = penalties[team]['reserved'].pop()
                penalty_duration = [2 * 60, 4 * 60, 5 * 60][r]  # Minor, Double Minor, Major
                for _ in range(1 if r != 1 else 2):  # Double minor adds two penalties
                    penalties[team][penalty_type].insert(0, game_play_time + penalty_duration)
                    penalties[team]['nbPlayerDown'] += 1

def update_power_play(penalties, power_play_time, game_play_time):
    """
    This function updates the power play status. It sets or resets the power_play_time based on the number of players down on each team.
    """
    if penalties['team1']['nbPlayerDown'] != penalties['team2']['nbPlayerDown'] and power_play_time == 0:
        power_play_time = game_play_time  # Start power play
    elif penalties['team1']['nbPlayerDown'] == penalties['team2']['nbPlayerDown'] and power_play_time != 0:
        power_play_time = 0  # Reset power play
    
    return power_play_time


def process_play(single_play, game_id, home, away, penalties, team_names, game_play_time, power_play_time, index, raw_data):

    event_type =  single_play['result']['event']
    team_playing = team_names[single_play['team']['name']]
    opposing_team = 'team2' if team_playing == 'team1' else 'team1'
    # about = single_play['about']
    event_data = {
        'event_type': event_type,
        # get the game ID
        'gameID': raw_data['gamePk'],
        # print(gameID)
        'gameType': raw_data['gameData']['game']['type'],
        # print(gameType)
        'home': raw_data['gameData']['teams']['home']['name'],
        # print(home)
        'away': raw_data['gameData']['teams']['away']['name'],
        # print(away)
        'season': raw_data['gameData']['game']['season']
    }

    # get the game evet id/time/period information
    event_id = single_play['about']['eventId']
    event_data["eventID"] = event_id
    event_data["gameID_eventID"] = f"{game_id}_{event_id}"
    event_data['game_time'] = single_play['about']['dateTime']
    event_data['game_period'] = single_play['about']['period']
    event_data['team'] = single_play['team']['name']

    # get the on-ice coordinates
    event_data['x_coordinate'] = single_play['coordinates'].get('x', None)
    event_data['y_coordinate'] = single_play['coordinates'].get('y', None)

    # get the short type
    event_data['shot_type'] = single_play['result'].get('secondaryType',None)

    if event_type == 'Shot':
        event_data['is_goal'] = False
        # Extracting shooter and goalie names
        for player in single_play['players']:
            if player['playerType'] == 'Shooter':
                event_data['shooter'] = player['player']['fullName']
            elif player['playerType'] == 'Goalie':
                event_data['goalie'] = player['player']['fullName']
    elif event_type == 'Goal':
        event_data['is_goal'] = True
        for player in single_play['players']:
            if player['playerType'] == 'Scorer':
                event_data['shooter'] = player['player']['fullName']
            if player['playerType'] == 'Goalie':
                event_data['goalie'] = player['player']['fullName']
        event_data['is_emptyNet'] = single_play['result'].get('emptyNet', None)
        event_data['strength'] = single_play['result'].get('strength',None).get('name', None)

        #releasing penalized players during a power-play when a goal is scored
        if penalties[team_playing]['nbPlayerDown'] != penalties[opposing_team]['nbPlayerDown']:
            release_penalized_players(penalties[opposing_team])

  
    # Additional processing common to both shots and goals
    # Game Seconds
    period_time = single_play['about']['periodTime']
    period = single_play['about']['period']
    event_data["game_seconds"] = (period-1)*20+(int(period_time.split(':')[0])*60)+int(period_time.split(':')[1])

    # Last event type
    event_data["time_from_last_event"] = None
    event_data["last_x_coordinate"] = None
    event_data["last_y_coordinate"] = None
    event_data["last_event"] = None

    if (index > 0) & (index<len(raw_data['liveData']['plays']['allPlays'])):
        previous_play = raw_data['liveData']['plays']['allPlays'][index - 1]
        event_data["last_event"] = previous_play['result']['event']
        # Coordinates of the last event (x, y)
        event_data["last_x_coordinate"] = previous_play['coordinates'].get('x', None)
        event_data["last_y_coordinate"] = previous_play['coordinates'].get('y', None)
        #Time from the last event (seconds)
        last_period_time = previous_play['about']['periodTime']
        last_period = previous_play['about']['period']
        previous_game_seconds = (last_period-1)*20+(int(last_period_time.split(':')[0])*60)+int(last_period_time.split(':')[1])
        event_data["time_from_last_event"] = event_data["game_seconds"] - previous_game_seconds
    
    # Calculate Distance from the last event
    if all(v is not None for v in [event_data["x_coordinate"], event_data["y_coordinate"], event_data["last_x_coordinate"], event_data["last_y_coordinate"]]):
        event_data["distance_from_last_event"] = np.linalg.norm(np.array([event_data["x_coordinate"], event_data["y_coordinate"]]) - np.array([event_data["last_x_coordinate"],event_data["last_y_coordinate"]]))      
    else:
        event_data["distance_from_last_event"] = None    
    
    # Rebound
    event_data["rebound"] = True if (event_data["last_event"] is not None and event_data["last_event"] == 'Shot' and event_type =='Shot') else False

    # Speed 
    if all(v is not None for v in [event_data["distance_from_last_event"], event_data["time_from_last_event"]]) and event_data["time_from_last_event"]!=0:
        event_data["speed"] = event_data["distance_from_last_event"]/event_data["time_from_last_event"]
    else:
        event_data["speed"] = None
        
    # Time elapsed since start of power-play
    event_data["power_play_time"] = power_play_time
    event_data["time_since_powerplay_started"] = game_play_time - power_play_time if power_play_time != 0 else 0

    # Calculate the number of skaters for each team
    friendly_non_goalie_skaters = 5 - penalties[team_playing]['nbPlayerDown']
    opposing_non_goalie_skaters = 5 - penalties[opposing_team]['nbPlayerDown']
    
    # Ensure that the number of skaters does not go below 3 as a team must have at least three skaters on the ice at all times in hockey
    event_data['num_friendly_non_goalie_skaters'] = max(friendly_non_goalie_skaters, 3)
    event_data['num_opposing_non_goalie_skaters'] = max(opposing_non_goalie_skaters, 3)
    
    # Return a dictionary with all the necessary data
    return event_data


def release_penalized_players(penalty_info):
    # Release the players from the minor or double minor penalties
    if penalty_info['minor']:
        penalty_info['minor'].pop()
        penalty_info['nbPlayerDown'] -= 1
    elif penalty_info['doubleMinor']:
        penalty_info['doubleMinor'].pop()
        if len(penalty_info['doubleMinor']) % 2 == 0:
            penalty_info['nbPlayerDown'] -= 1


In [123]:
def process_game_json(file_path):
    with open(file_path, 'r') as file:
        raw_data = json.load(file)
    return tidy_one_game_data(raw_data)

def concatenate_all_games_data(dataset_root_dir):
    all_games_data = []

    for root, dirs, files in os.walk(dataset_root_dir):
        print(f'Processing directory: {root}')  # Debugging information
        for file in files:
            if file.endswith(".json"):
                file_path = os.path.join(root, file)
                # print('Processing file: {file_path}')
                game_data_df = process_game_json(file_path)
                all_games_data.append(game_data_df)

    # Concatenate all individual game data DataFrames into a single DataFrame
    all_games_df = pd.concat(all_games_data, ignore_index=True)

    return all_games_df

# Usage:
dataset_root_dir = 'data/20202021/R'
test = concatenate_all_games_data(dataset_root_dir)
test

Processing directory: data/20202021/R


Unnamed: 0,event_type,gameID,gameType,home,away,season,eventID,gameID_eventID,game_time,game_period,team,x_coordinate,y_coordinate,shot_type,is_goal,shooter,goalie,game_seconds,time_from_last_event,last_x_coordinate,last_y_coordinate,last_event,distance_from_last_event,rebound,speed,power_play_time,time_since_powerplay_started,num_friendly_non_goalie_skaters,num_opposing_non_goalie_skaters,is_emptyNet,strength
0,Shot,2020020180,R,New Jersey Devils,New York Rangers,20202021,11,2020020180_11,2021-03-05T00:10:40Z,1,New Jersey Devils,66.0,31.0,Snap Shot,False,Jesper Bratt,Igor Shesterkin,123,14,53.0,-34.0,Takeaway,66.287254,False,4.734804,0,0,5,5,,
1,Shot,2020020180,R,New Jersey Devils,New York Rangers,20202021,19,2020020180_19,2021-03-05T00:14:59Z,1,New York Rangers,-35.0,-6.0,Wrist Shot,False,Filip Chytil,Mackenzie Blackwood,323,11,-64.0,30.0,Giveaway,46.227697,False,4.202518,0,0,5,5,,
2,Shot,2020020180,R,New Jersey Devils,New York Rangers,20202021,20,2020020180_20,2021-03-05T00:16:31Z,1,New Jersey Devils,41.0,-27.0,Snap Shot,False,Sami Vatanen,Igor Shesterkin,413,17,-12.0,-37.0,Giveaway,53.935146,False,3.172656,0,0,5,5,,
3,Goal,2020020180,R,New Jersey Devils,New York Rangers,20202021,23,2020020180_23,2021-03-05T00:19:10Z,1,New Jersey Devils,61.0,5.0,Snap Shot,True,Jack Hughes,Igor Shesterkin,432,19,69.0,-22.0,Faceoff,28.160256,False,1.482119,0,0,5,5,False,Even
4,Shot,2020020180,R,New Jersey Devils,New York Rangers,20202021,26,2020020180_26,2021-03-05T00:20:40Z,1,New Jersey Devils,36.0,16.0,Wrist Shot,False,Ty Smith,Igor Shesterkin,452,4,97.0,13.0,Takeaway,61.073726,False,15.268431,0,0,5,5,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52403,Shot,2020020681,R,Detroit Red Wings,Chicago Blackhawks,20202021,658,2020020681_658,2021-04-16T01:46:18Z,3,Chicago Blackhawks,66.0,28.0,Wrist Shot,False,Dominik Kubalik,Thomas Greiss,1011,33,62.0,0.0,Shot,28.284271,True,0.857099,0,0,5,5,,
52404,Shot,2020020681,R,Detroit Red Wings,Chicago Blackhawks,20202021,659,2020020681_659,2021-04-16T01:47:10Z,3,Chicago Blackhawks,66.0,-5.0,Snap Shot,False,Wyatt Kalynuk,Thomas Greiss,1063,1,60.0,33.0,Giveaway,38.470768,False,38.470768,0,0,5,5,,
52405,Goal,2020020681,R,Detroit Red Wings,Chicago Blackhawks,20202021,660,2020020681_660,2021-04-16T01:49:31Z,3,Detroit Red Wings,-56.0,-15.0,Wrist Shot,True,Michael Rasmussen,,1154,17,25.0,12.0,Giveaway,85.381497,False,5.022441,0,0,5,5,True,Even
52406,Shot,2020020681,R,Detroit Red Wings,Chicago Blackhawks,20202021,662,2020020681_662,2021-04-16T01:51:23Z,3,Chicago Blackhawks,60.0,-8.0,Wrist Shot,False,Wyatt Kalynuk,Thomas Greiss,1226,2,67.0,-7.0,Blocked Shot,7.071068,False,3.535534,0,0,5,5,,


In [124]:
test.num_friendly_non_goalie_skaters.value_counts()

num_friendly_non_goalie_skaters
5    49324
4     2855
3      229
Name: count, dtype: int64

In [145]:
def process_game_json(file_path):
    with open(file_path, 'r') as file:
        raw_data = json.load(file)
    return tidy_one_game_data(raw_data)

def concatenate_all_games_data(dataset_root_dir):
    all_games_data = []

    for root, dirs, files in os.walk(dataset_root_dir):
        print(f'Processing directory: {root}')  # Debugging information
        for file in files:
            if file.endswith(".json"):
                file_path = os.path.join(root, file)
                # print('Processing file: {file_path}')
                game_data_df = process_game_json(file_path)
                all_games_data.append(game_data_df)

    # Concatenate all individual game data DataFrames into a single DataFrame
    all_games_df = pd.concat(all_games_data, ignore_index=True)

    return all_games_df

# Usage:
dataset_root_dir = 'data'
all_games_df_2 = concatenate_all_games_data(dataset_root_dir)
all_games_df_2

Processing directory: data
Processing directory: data/20202021
Processing directory: data/20202021/R
Processing directory: data/20202021/P
Processing directory: data/20192020
Processing directory: data/20192020/R
Processing directory: data/20192020/P
Processing directory: data/20182019
Processing directory: data/20182019/R
Processing directory: data/20182019/P
Processing directory: data/20172018
Processing directory: data/20172018/R
Processing directory: data/20172018/P
Processing directory: data/20162017
Processing directory: data/20162017/R
Processing directory: data/20162017/P


Unnamed: 0,event_type,gameID,gameType,home,away,season,eventID,gameID_eventID,game_time,game_period,team,x_coordinate,y_coordinate,shot_type,is_goal,shooter,goalie,game_seconds,time_from_last_event,last_x_coordinate,last_y_coordinate,last_event,distance_from_last_event,rebound,speed,power_play_time,time_since_powerplay_started,num_friendly_non_goalie_skaters,num_opposing_non_goalie_skaters,is_emptyNet,strength
0,Shot,2020020180,R,New Jersey Devils,New York Rangers,20202021,11,2020020180_11,2021-03-05T00:10:40Z,1,New Jersey Devils,66.0,31.0,Snap Shot,False,Jesper Bratt,Igor Shesterkin,123,14,53.0,-34.0,Takeaway,66.287254,False,4.734804,0,0,5,5,,
1,Shot,2020020180,R,New Jersey Devils,New York Rangers,20202021,19,2020020180_19,2021-03-05T00:14:59Z,1,New York Rangers,-35.0,-6.0,Wrist Shot,False,Filip Chytil,Mackenzie Blackwood,323,11,-64.0,30.0,Giveaway,46.227697,False,4.202518,0,0,5,5,,
2,Shot,2020020180,R,New Jersey Devils,New York Rangers,20202021,20,2020020180_20,2021-03-05T00:16:31Z,1,New Jersey Devils,41.0,-27.0,Snap Shot,False,Sami Vatanen,Igor Shesterkin,413,17,-12.0,-37.0,Giveaway,53.935146,False,3.172656,0,0,5,5,,
3,Goal,2020020180,R,New Jersey Devils,New York Rangers,20202021,23,2020020180_23,2021-03-05T00:19:10Z,1,New Jersey Devils,61.0,5.0,Snap Shot,True,Jack Hughes,Igor Shesterkin,432,19,69.0,-22.0,Faceoff,28.160256,False,1.482119,0,0,5,5,False,Even
4,Shot,2020020180,R,New Jersey Devils,New York Rangers,20202021,26,2020020180_26,2021-03-05T00:20:40Z,1,New Jersey Devils,36.0,16.0,Wrist Shot,False,Ty Smith,Igor Shesterkin,452,4,97.0,13.0,Takeaway,61.073726,False,15.268431,0,0,5,5,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
387807,Shot,2016030153,P,Nashville Predators,Chicago Blackhawks,20162017,859,2016030153_859,2017-04-18T05:01:24Z,4,Chicago Blackhawks,39.0,33.0,Slap Shot,False,Brent Seabrook,Pekka Rinne,786,6,55.0,-20.0,Blocked Shot,55.362442,False,9.227074,0,0,5,5,,
387808,Shot,2016030153,P,Nashville Predators,Chicago Blackhawks,20162017,861,2016030153_861,2017-04-18T05:03:14Z,4,Nashville Predators,-75.0,-5.0,Snap Shot,False,Kevin Fiala,Corey Crawford,896,102,-57.0,-28.0,Missed Shot,29.206164,False,0.286335,0,0,5,5,,
387809,Shot,2016030153,P,Nashville Predators,Chicago Blackhawks,20162017,864,2016030153_864,2017-04-18T05:05:35Z,4,Nashville Predators,-33.0,1.0,Wrist Shot,False,Ryan Ellis,Corey Crawford,967,2,-69.0,22.0,Faceoff,41.677332,False,20.838666,0,0,5,5,,
387810,Shot,2016030153,P,Nashville Predators,Chicago Blackhawks,20162017,867,2016030153_867,2017-04-18T05:06:20Z,4,Chicago Blackhawks,25.0,-20.0,Snap Shot,False,Marcus Kruger,Pekka Rinne,984,12,-44.0,11.0,Missed Shot,75.643903,False,6.303659,0,0,5,5,,


In [36]:
pd.set_option('display.max_columns', 500)

In [104]:
len(all_games_df)

387812

In [130]:
len(all_games_df_2)

387812

In [131]:
all_games_df_2.last_event.value_counts()

last_event
Faceoff               93385
Shot                  75149
Hit                   69512
Blocked Shot          41624
Giveaway              37453
Missed Shot           36219
Takeaway              26293
Stoppage               6566
Goal                    697
Penalty                 413
Period Start            395
Period End               58
Official Challenge       31
Period Ready              7
Shootout Complete         4
Game End                  2
Game Official             2
Period Official           2
Name: count, dtype: int64

In [119]:
all_games_df

Unnamed: 0,index,gameID_eventID,game_period,dateTime,gameID,team,goal,x,y,shooter,goalie,shotType,emptyNet,strength,gameType,home,away,season,last_event,last_x,last_y,game_seconds,time_from_last_event,distance_from_last_event,rebound,speed,time_since_powerplay_started,nbFriendly_non_goalie_skaters,nbOpposing_non_goalie_skaters,team_side,distanceNet_or_shotDistance,angleNet_or_shotAngleWithSign,shot_angle_absolute,change_in_shot_angle
0,0,2020020180_11,1,2021-03-05T00:10:40Z,2020020180,New Jersey Devils,0,66.0,31.0,Jesper Bratt,Igor Shesterkin,Snap Shot,0.0,,R,New Jersey Devils,New York Rangers,20202021,Takeaway,53.0,-34.0,123,14,66.287254,False,4.734804,0,5,5,left,38.600518,53.426969,53.426969,0.0
1,1,2020020180_19,1,2021-03-05T00:14:59Z,2020020180,New York Rangers,0,-35.0,-6.0,Filip Chytil,Mackenzie Blackwood,Wrist Shot,0.0,,R,New Jersey Devils,New York Rangers,20202021,Giveaway,-64.0,30.0,323,11,46.227697,False,4.202518,0,5,5,right,54.332311,6.340192,6.340192,0.0
2,2,2020020180_20,1,2021-03-05T00:16:31Z,2020020180,New Jersey Devils,0,41.0,-27.0,Sami Vatanen,Igor Shesterkin,Snap Shot,0.0,,R,New Jersey Devils,New York Rangers,20202021,Giveaway,-12.0,-37.0,413,17,53.935146,False,3.172656,0,5,5,left,55.072679,-29.357754,29.357754,0.0
3,3,2020020180_23,1,2021-03-05T00:19:10Z,2020020180,New Jersey Devils,1,61.0,5.0,Jack Hughes,Igor Shesterkin,Snap Shot,0.0,Even,R,New Jersey Devils,New York Rangers,20202021,Faceoff,69.0,-22.0,432,19,28.160256,False,1.482119,0,5,5,left,28.442925,10.124672,10.124672,0.0
4,4,2020020180_26,1,2021-03-05T00:20:40Z,2020020180,New Jersey Devils,0,36.0,16.0,Ty Smith,Igor Shesterkin,Wrist Shot,0.0,,R,New Jersey Devils,New York Rangers,20202021,Takeaway,97.0,13.0,452,4,61.073726,False,15.268431,0,5,5,left,55.362442,16.798372,16.798372,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
387807,80,2016030153_859,4,2017-04-18T05:01:24Z,2016030153,Chicago Blackhawks,0,39.0,33.0,Brent Seabrook,Pekka Rinne,Slap Shot,0.0,,P,Nashville Predators,Chicago Blackhawks,20162017,Blocked Shot,55.0,-20.0,786,6,55.362442,False,9.227074,0,5,5,left,59.908263,33.424811,33.424811,0.0
387808,81,2016030153_861,4,2017-04-18T05:03:14Z,2016030153,Nashville Predators,0,-75.0,-5.0,Kevin Fiala,Corey Crawford,Snap Shot,0.0,,P,Nashville Predators,Chicago Blackhawks,20162017,Missed Shot,-57.0,-28.0,896,102,29.206164,False,0.286335,0,5,5,right,14.866069,19.653824,19.653824,0.0
387809,82,2016030153_864,4,2017-04-18T05:05:35Z,2016030153,Nashville Predators,0,-33.0,1.0,Ryan Ellis,Corey Crawford,Wrist Shot,0.0,,P,Nashville Predators,Chicago Blackhawks,20162017,Faceoff,-69.0,22.0,967,2,41.677332,False,20.838666,0,5,5,right,56.008928,-1.023030,1.023030,0.0
387810,83,2016030153_867,4,2017-04-18T05:06:20Z,2016030153,Chicago Blackhawks,0,25.0,-20.0,Marcus Kruger,Pekka Rinne,Snap Shot,0.0,,P,Nashville Predators,Chicago Blackhawks,20162017,Missed Shot,-44.0,11.0,984,12,75.643903,False,6.303659,0,5,5,left,67.052218,-17.354025,17.354025,0.0


In [136]:
all_games_df_2.time_since_powerplay_started.value_counts()

time_since_powerplay_started
0      323779
8         804
7         721
6         715
10        715
        ...  
692         1
537         1
672         1
670         1
593         1
Name: count, Length: 604, dtype: int64

In [137]:
all_games_df.time_since_powerplay_started.value_counts()

time_since_powerplay_started
0       265606
8          696
7          656
6          628
10         618
         ...  
1048         1
1067         1
1072         1
1015         1
1169         1
Name: count, Length: 2228, dtype: int64

In [44]:
all_games_df.time_since_powerplay_started.value_counts()

time_since_powerplay_started
0       265606
8          696
7          656
6          628
10         618
         ...  
1048         1
1067         1
1072         1
1015         1
1169         1
Name: count, Length: 2228, dtype: int64

In [146]:
old_method_data = all_games_df[all_games_df.time_since_powerplay_started!=0]
old_method_data

Unnamed: 0,index,gameID_eventID,game_period,dateTime,gameID,team,goal,x,y,shooter,goalie,shotType,emptyNet,strength,gameType,home,away,season,last_event,last_x,last_y,game_seconds,time_from_last_event,distance_from_last_event,rebound,speed,time_since_powerplay_started,nbFriendly_non_goalie_skaters,nbOpposing_non_goalie_skaters,team_side,distanceNet_or_shotDistance,angleNet_or_shotAngleWithSign,shot_angle_absolute,change_in_shot_angle
15,15,2020020180_261,1,2021-03-05T00:40:10Z,2020020180,New Jersey Devils,0,83.0,-10.0,Andreas Johnsson,Igor Shesterkin,Wrist Shot,0.0,,R,New Jersey Devils,New York Rangers,20202021,Hit,-34.0,-37.0,973,13,120.074977,False,9.236537,42,5,4,left,11.661904,-59.036243,59.036243,0.0
23,23,2020020180_297,2,2021-03-05T01:16:15Z,2020020180,New York Rangers,1,74.0,11.0,Chris Kreider,Mackenzie Blackwood,Wrist Shot,0.0,Power Play,R,New Jersey Devils,New York Rangers,20202021,Faceoff,69.0,-22.0,528,36,33.376639,False,0.927129,36,5,4,left,18.601075,36.253838,36.253838,0.0
24,24,2020020180_298,2,2021-03-05T01:17:12Z,2020020180,New York Rangers,0,64.0,16.0,Brett Howden,Mackenzie Blackwood,Backhand,0.0,,R,New Jersey Devils,New York Rangers,20202021,Faceoff,0.0,0.0,544,16,65.969690,False,4.123106,52,5,4,left,29.681644,32.619243,32.619243,0.0
33,33,2020020180_417,2,2021-03-05T01:31:06Z,2020020180,New Jersey Devils,0,-79.0,8.0,Kyle Palmieri,Igor Shesterkin,Tip-In,0.0,,R,New Jersey Devils,New York Rangers,20202021,Missed Shot,-77.0,-26.0,962,23,34.058773,False,1.480816,45,5,4,right,12.806248,-38.659808,38.659808,0.0
44,44,2020020180_457,3,2021-03-05T02:13:30Z,2020020180,New Jersey Devils,0,62.0,-15.0,Pavel Zacha,Igor Shesterkin,Wrist Shot,0.0,,R,New Jersey Devils,New York Rangers,20202021,Takeaway,54.0,-36.0,550,32,22.472205,False,0.702256,88,4,3,left,30.886890,-29.054604,29.054604,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
387740,13,2016030153_40,1,2017-04-18T02:05:49Z,2016030153,Nashville Predators,0,36.0,-3.0,Roman Josi,Corey Crawford,Slap Shot,0.0,,P,Nashville Predators,Chicago Blackhawks,20162017,Faceoff,69.0,-22.0,551,29,38.078866,False,1.313064,49,5,4,left,53.084838,-3.239700,3.239700,0.0
387761,34,2016030153_402,2,2017-04-18T02:51:17Z,2016030153,Chicago Blackhawks,0,75.0,-34.0,Patrick Kane,Pekka Rinne,Wrist Shot,0.0,,P,Nashville Predators,Chicago Blackhawks,20162017,Faceoff,69.0,-22.0,262,32,13.416408,False,0.419263,32,5,4,left,36.769553,-67.619865,67.619865,0.0
387765,38,2016030153_418,2,2017-04-18T03:03:05Z,2016030153,Nashville Predators,0,-56.0,-36.0,Austin Watson,Corey Crawford,Wrist Shot,0.0,,P,Nashville Predators,Chicago Blackhawks,20162017,Faceoff,69.0,22.0,658,22,137.800581,False,6.263663,22,4,5,right,48.836462,47.489553,47.489553,0.0
387766,39,2016030153_422,2,2017-04-18T03:04:17Z,2016030153,Chicago Blackhawks,1,73.0,-23.0,Patrick Kane,Pekka Rinne,Wrist Shot,0.0,Power Play,P,Nashville Predators,Chicago Blackhawks,20162017,Missed Shot,62.0,-27.0,695,17,11.704700,False,0.688512,59,5,4,left,28.017851,-55.175511,55.175511,0.0


In [147]:
new_method = all_games_df_2[all_games_df_2.time_since_powerplay_started!=0]
new_method[~new_method.gameID_eventID.isin(old_method_data.gameID_eventID.tolist())]

Unnamed: 0,event_type,gameID,gameType,home,away,season,eventID,gameID_eventID,game_time,game_period,team,x_coordinate,y_coordinate,shot_type,is_goal,shooter,goalie,game_seconds,time_from_last_event,last_x_coordinate,last_y_coordinate,last_event,distance_from_last_event,rebound,speed,power_play_time,time_since_powerplay_started,num_friendly_non_goalie_skaters,num_opposing_non_goalie_skaters,is_emptyNet,strength
93,Shot,2020020495,R,Los Angeles Kings,Vegas Golden Knights,20202021,398,2020020495_398,2021-03-21T23:37:42Z,2,Vegas Golden Knights,-69.0,-21.0,Wrist Shot,False,Reilly Smith,Calvin Petersen,1043,22,-30.0,-19.0,Takeaway,39.051248,False,1.775057,2182,41,5,4,,
94,Shot,2020020495,R,Los Angeles Kings,Vegas Golden Knights,20202021,400,2020020495_400,2021-03-21T23:38:16Z,2,Vegas Golden Knights,-36.0,1.0,Wrist Shot,False,Shea Theodore,Calvin Petersen,1051,7,-69.0,-22.0,Faceoff,40.224371,False,5.746339,2182,49,5,4,,
136,Shot,2020020246,R,Detroit Red Wings,Chicago Blackhawks,20202021,120,2020020246_120,2021-02-16T00:54:18Z,1,Detroit Red Wings,40.0,30.0,Wrist Shot,False,Frans Nielsen,Malcolm Subban,621,7,52.0,4.0,Blocked Shot,28.635642,False,4.090806,522,99,4,5,,
165,Shot,2020020246,R,Detroit Red Wings,Chicago Blackhawks,20202021,430,2020020246_430,2021-02-16T02:26:21Z,3,Chicago Blackhawks,75.0,27.0,Wrist Shot,False,Mattias Janmark,Thomas Greiss,213,11,-69.0,-22.0,Faceoff,152.108514,False,13.828047,2562,11,4,5,,
166,Shot,2020020246,R,Detroit Red Wings,Chicago Blackhawks,20202021,431,2020020246_431,2021-02-16T02:28:16Z,3,Detroit Red Wings,-54.0,-38.0,Wrist Shot,False,Mathias Brome,Malcolm Subban,268,33,-69.0,22.0,Faceoff,61.846584,False,1.874139,2562,66,5,4,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
387642,Shot,2016030112,P,Montréal Canadiens,New York Rangers,20162017,580,2016030112_580,2017-04-15T01:07:49Z,3,Montréal Canadiens,-85.0,-25.0,Wrist Shot,False,Andrei Markov,Henrik Lundqvist,104,7,-69.0,22.0,Faceoff,49.648766,False,7.092681,2457,7,5,4,,
387643,Shot,2016030112,P,Montréal Canadiens,New York Rangers,20162017,582,2016030112_582,2017-04-15T01:08:43Z,3,Montréal Canadiens,-61.0,-4.0,Backhand,False,Andrew Shaw,Henrik Lundqvist,136,27,-69.0,22.0,Faceoff,27.202941,False,1.007516,2457,39,5,4,,
387644,Shot,2016030112,P,Montréal Canadiens,New York Rangers,20162017,583,2016030112_583,2017-04-15T01:09:15Z,3,Montréal Canadiens,-54.0,-1.0,Wrist Shot,False,Artturi Lehkonen,Henrik Lundqvist,169,33,-61.0,-4.0,Shot,7.615773,True,0.230781,2457,72,5,4,,
387645,Shot,2016030112,P,Montréal Canadiens,New York Rangers,20162017,584,2016030112_584,2017-04-15T01:09:45Z,3,New York Rangers,67.0,29.0,Wrist Shot,False,Rick Nash,Carey Price,198,26,-96.0,-6.0,Hit,166.715326,False,6.412128,2457,101,4,5,,


In [148]:
new_method[new_method.gameID_eventID.isin(old_method_data.gameID_eventID.tolist())]

Unnamed: 0,event_type,gameID,gameType,home,away,season,eventID,gameID_eventID,game_time,game_period,team,x_coordinate,y_coordinate,shot_type,is_goal,shooter,goalie,game_seconds,time_from_last_event,last_x_coordinate,last_y_coordinate,last_event,distance_from_last_event,rebound,speed,power_play_time,time_since_powerplay_started,num_friendly_non_goalie_skaters,num_opposing_non_goalie_skaters,is_emptyNet,strength
15,Shot,2020020180,R,New Jersey Devils,New York Rangers,20202021,261,2020020180_261,2021-03-05T00:40:10Z,1,New Jersey Devils,83.0,-10.0,Wrist Shot,False,Andreas Johnsson,Igor Shesterkin,973,13,-34.0,-37.0,Hit,120.074977,False,9.236537,931,42,5,4,,
23,Goal,2020020180,R,New Jersey Devils,New York Rangers,20202021,297,2020020180_297,2021-03-05T01:16:15Z,2,New York Rangers,74.0,11.0,Wrist Shot,True,Chris Kreider,Mackenzie Blackwood,528,36,69.0,-22.0,Faceoff,33.376639,False,0.927129,1672,36,5,5,False,Power Play
33,Shot,2020020180,R,New Jersey Devils,New York Rangers,20202021,417,2020020180_417,2021-03-05T01:31:06Z,2,New Jersey Devils,-79.0,8.0,Tip-In,False,Kyle Palmieri,Igor Shesterkin,962,23,-77.0,-26.0,Missed Shot,34.058773,False,1.480816,2097,45,5,4,,
44,Shot,2020020180,R,New Jersey Devils,New York Rangers,20202021,457,2020020180_457,2021-03-05T02:13:30Z,3,New Jersey Devils,62.0,-15.0,Wrist Shot,False,Pavel Zacha,Igor Shesterkin,550,32,54.0,-36.0,Takeaway,22.472205,False,0.702256,2822,88,4,3,,
47,Shot,2020020180,R,New Jersey Devils,New York Rangers,20202021,467,2020020180_467,2021-03-05T02:19:41Z,3,New Jersey Devils,74.0,6.0,Tip-In,False,Janne Kuokkanen,Igor Shesterkin,787,26,57.0,10.0,Blocked Shot,17.464249,False,0.671702,3034,113,5,4,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
387740,Shot,2016030153,P,Nashville Predators,Chicago Blackhawks,20162017,40,2016030153_40,2017-04-18T02:05:49Z,1,Nashville Predators,36.0,-3.0,Slap Shot,False,Roman Josi,Corey Crawford,551,29,69.0,-22.0,Faceoff,38.078866,False,1.313064,502,49,5,4,,
387761,Shot,2016030153,P,Nashville Predators,Chicago Blackhawks,20162017,402,2016030153_402,2017-04-18T02:51:17Z,2,Chicago Blackhawks,75.0,-34.0,Wrist Shot,False,Patrick Kane,Pekka Rinne,262,32,69.0,-22.0,Faceoff,13.416408,False,0.419263,1410,32,5,4,,
387765,Shot,2016030153,P,Nashville Predators,Chicago Blackhawks,20162017,418,2016030153_418,2017-04-18T03:03:05Z,2,Nashville Predators,-56.0,-36.0,Wrist Shot,False,Austin Watson,Corey Crawford,658,22,69.0,22.0,Faceoff,137.800581,False,6.263663,1816,22,4,5,,
387766,Goal,2016030153,P,Nashville Predators,Chicago Blackhawks,20162017,422,2016030153_422,2017-04-18T03:04:17Z,2,Chicago Blackhawks,73.0,-23.0,Wrist Shot,True,Patrick Kane,Pekka Rinne,695,17,62.0,-27.0,Missed Shot,11.704700,False,0.688512,1816,59,5,5,False,Power Play


In [149]:
old_method_data[old_method_data.gameID_eventID.isin(new_method.gameID_eventID.tolist())]

Unnamed: 0,index,gameID_eventID,game_period,dateTime,gameID,team,goal,x,y,shooter,goalie,shotType,emptyNet,strength,gameType,home,away,season,last_event,last_x,last_y,game_seconds,time_from_last_event,distance_from_last_event,rebound,speed,time_since_powerplay_started,nbFriendly_non_goalie_skaters,nbOpposing_non_goalie_skaters,team_side,distanceNet_or_shotDistance,angleNet_or_shotAngleWithSign,shot_angle_absolute,change_in_shot_angle
15,15,2020020180_261,1,2021-03-05T00:40:10Z,2020020180,New Jersey Devils,0,83.0,-10.0,Andreas Johnsson,Igor Shesterkin,Wrist Shot,0.0,,R,New Jersey Devils,New York Rangers,20202021,Hit,-34.0,-37.0,973,13,120.074977,False,9.236537,42,5,4,left,11.661904,-59.036243,59.036243,0.0
23,23,2020020180_297,2,2021-03-05T01:16:15Z,2020020180,New York Rangers,1,74.0,11.0,Chris Kreider,Mackenzie Blackwood,Wrist Shot,0.0,Power Play,R,New Jersey Devils,New York Rangers,20202021,Faceoff,69.0,-22.0,528,36,33.376639,False,0.927129,36,5,4,left,18.601075,36.253838,36.253838,0.0
33,33,2020020180_417,2,2021-03-05T01:31:06Z,2020020180,New Jersey Devils,0,-79.0,8.0,Kyle Palmieri,Igor Shesterkin,Tip-In,0.0,,R,New Jersey Devils,New York Rangers,20202021,Missed Shot,-77.0,-26.0,962,23,34.058773,False,1.480816,45,5,4,right,12.806248,-38.659808,38.659808,0.0
44,44,2020020180_457,3,2021-03-05T02:13:30Z,2020020180,New Jersey Devils,0,62.0,-15.0,Pavel Zacha,Igor Shesterkin,Wrist Shot,0.0,,R,New Jersey Devils,New York Rangers,20202021,Takeaway,54.0,-36.0,550,32,22.472205,False,0.702256,88,4,3,left,30.886890,-29.054604,29.054604,0.0
47,47,2020020180_467,3,2021-03-05T02:19:41Z,2020020180,New Jersey Devils,0,74.0,6.0,Janne Kuokkanen,Igor Shesterkin,Tip-In,0.0,,R,New Jersey Devils,New York Rangers,20202021,Blocked Shot,57.0,10.0,787,26,17.464249,False,0.671702,113,5,4,left,16.155494,21.801409,21.801409,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
387740,13,2016030153_40,1,2017-04-18T02:05:49Z,2016030153,Nashville Predators,0,36.0,-3.0,Roman Josi,Corey Crawford,Slap Shot,0.0,,P,Nashville Predators,Chicago Blackhawks,20162017,Faceoff,69.0,-22.0,551,29,38.078866,False,1.313064,49,5,4,left,53.084838,-3.239700,3.239700,0.0
387761,34,2016030153_402,2,2017-04-18T02:51:17Z,2016030153,Chicago Blackhawks,0,75.0,-34.0,Patrick Kane,Pekka Rinne,Wrist Shot,0.0,,P,Nashville Predators,Chicago Blackhawks,20162017,Faceoff,69.0,-22.0,262,32,13.416408,False,0.419263,32,5,4,left,36.769553,-67.619865,67.619865,0.0
387765,38,2016030153_418,2,2017-04-18T03:03:05Z,2016030153,Nashville Predators,0,-56.0,-36.0,Austin Watson,Corey Crawford,Wrist Shot,0.0,,P,Nashville Predators,Chicago Blackhawks,20162017,Faceoff,69.0,22.0,658,22,137.800581,False,6.263663,22,4,5,right,48.836462,47.489553,47.489553,0.0
387766,39,2016030153_422,2,2017-04-18T03:04:17Z,2016030153,Chicago Blackhawks,1,73.0,-23.0,Patrick Kane,Pekka Rinne,Wrist Shot,0.0,Power Play,P,Nashville Predators,Chicago Blackhawks,20162017,Missed Shot,62.0,-27.0,695,17,11.704700,False,0.688512,59,5,4,left,28.017851,-55.175511,55.175511,0.0


In [None]:
all_games_df_2[all_games_df_2.time_since_powerplay_started!=0]

In [142]:
df_tidy[df_tidy.gameID_eventID.isin(["2017021065_13", "2017021065_14"])]

Unnamed: 0,index,gameID_eventID,game_period,dateTime,gameID,team,goal,x,y,shooter,goalie,shotType,emptyNet,strength,gameType,home,away,season,last_event,last_x,last_y,game_seconds,time_from_last_event,distance_from_last_event,rebound,speed,time_since_powerplay_started,nbFriendly_non_goalie_skaters,nbOpposing_non_goalie_skaters,team_side,distanceNet_or_shotDistance,angleNet_or_shotAngleWithSign,shot_angle_absolute,change_in_shot_angle
0,0,2017021065_13,1,2018-03-12T23:11:06Z,2017021065,Washington Capitals,0,-50.0,36.0,Michal Kempny,Connor Hellebuyck,Snap Shot,0,,R,Washington Capitals,Winnipeg Jets,20172018,Hit,72.0,37.0,111,11,122.004098,False,11.091282,0,5,5,right,53.075418,-42.70939,42.70939,0.0
1,1,2017021065_14,1,2018-03-12T23:11:11Z,2017021065,Washington Capitals,0,-85.0,-25.0,John Carlson,Connor Hellebuyck,Wrist Shot,0,,R,Washington Capitals,Winnipeg Jets,20172018,Shot,-50.0,36.0,115,4,70.327804,True,17.581951,0,5,5,right,25.317978,80.909723,80.909723,56.380887


In [143]:
all_games_df_2[all_games_df_2.gameID_eventID.isin(["2017021065_13", "2017021065_14"])]

Unnamed: 0,event_type,gameID,gameType,home,away,season,eventID,gameID_eventID,game_time,game_period,team,x_coordinate,y_coordinate,shot_type,is_goal,shooter,goalie,game_seconds,time_from_last_event,last_x_coordinate,last_y_coordinate,last_event,distance_from_last_event,rebound,speed,power_play_time,time_since_powerplay_started,num_friendly_non_goalie_skaters,num_opposing_non_goalie_skaters,is_emptyNet,strength
294310,Shot,2017021065,R,Washington Capitals,Winnipeg Jets,20172018,13,2017021065_13,2018-03-12T23:11:06Z,1,Washington Capitals,-50.0,36.0,Snap Shot,False,Michal Kempny,Connor Hellebuyck,111,11,72.0,37.0,Hit,122.004098,False,11.091282,0,0,5,5,,
294311,Shot,2017021065,R,Washington Capitals,Winnipeg Jets,20172018,14,2017021065_14,2018-03-12T23:11:11Z,1,Washington Capitals,-85.0,-25.0,Wrist Shot,False,John Carlson,Connor Hellebuyck,115,4,-50.0,36.0,Shot,70.327804,True,17.581951,0,0,5,5,,
