### Install Things

In [1]:
!pip install pandasql --quiet

### Import Things

In [36]:
import numpy as np
import pandas as pd
import pandasql as psql
import warnings
import math
from pandas.errors import SettingWithCopyWarning

pd.set_option("display.max_columns", None)
warnings.simplefilter(action = "ignore", category = SettingWithCopyWarning)

### Some Functions

In [3]:
# Keep only shots and goals function
def keep_SOG(df):
    df = df[(df["typeDescKey"] == "shot-on-goal") | (df["typeDescKey"] == "goal")]

    return(df)

In [4]:
# Fix pbp data types
def fix_pbp_types(pbp_df):
    pbp_df["situationCode"] = pbp_df["situationCode"].astype(int).astype(str).str.pad(width = 4, side = "left", fillchar = "0")
    pbp_df["gameId"] = pbp_df["gameId"].astype(str)
    pbp_df["periodDescriptor.number"] = pbp_df["periodDescriptor.number"].astype(int)
    pbp_df["details.eventOwnerTeamId"] = pbp_df["details.eventOwnerTeamId"].astype(int).astype(str)

    return(pbp_df)

In [5]:
# Fix schedule data types
def fix_schedule_types(schedule_df):
    schedule_df["id"] = schedule_df["id"].astype(str)
    schedule_df["awayTeam.id"] = schedule_df["awayTeam.id"].astype(str)
    schedule_df["homeTeam.id"] = schedule_df["homeTeam.id"].astype(str)

    return(schedule_df)

In [23]:
# Determine which direction the shot is being taken
def shot_side(row):
    if row["homeTeamShot"] == True:
        if row["homeTeamDefendingSide"] == "left":
            return "right"
        else:
            return "left"
    else:
        return row["homeTeamDefendingSide"]

In [37]:
# Determine shot distance
def get_shot_distance(row):
    return math.hypot(89 - row["adj.xCoord"], 0 - row["adj.yCoord"])

In [96]:
# Determine shot distance class
def get_shot_distance_class(df, model):
    # Create shot distance buckets
    if model == "krzy05":
        conditions = [
            df["shotDistance"] < 10,
            (df["shotDistance"] >= 10) & (df["shotDistance"] < 13),
            (df["shotDistance"] >= 13) & (df["shotDistance"] < 15),
            (df["shotDistance"] >= 15) & (df["shotDistance"] < 17),
            (df["shotDistance"] >= 17) & (df["shotDistance"] < 23),
            (df["shotDistance"] >= 23) & (df["shotDistance"] < 32),
            (df["shotDistance"] >= 32) & (df["shotDistance"] < 37),
            (df["shotDistance"] >= 37) & (df["shotDistance"] < 39),
            (df["shotDistance"] >= 39) & (df["shotDistance"] < 45),
            (df["shotDistance"] >= 45) & (df["shotDistance"] < 58),
            df["shotDistance"] >= 58
        ]

        choices = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

        df["shotDistanceClass"] = np.select(conditions, choices, default = None)
        return(df)

In [104]:
# Get Strength at time of event
def get_strength(row):
    if row["homeTeamShot"] == True:
        if row["home_players"] > row["away_players"]:
            return "PP"
        elif row["home_players"] < row["away_players"]:
            return "SH"
        else:
            return "EV"
    elif row["homeTeamShot"] == False:
        if row["home_players"] > row["away_players"]:
            return "SH"
        elif row["home_players"] < row["away_players"]:
            return "PP"
        else:
            return "EV"

### Get Data

In [6]:
skaters = pd.read_csv("skaters.csv")
goalies = pd.read_csv("goalies.csv")
pbp = pd.read_csv("pbp.csv")
schedule = pd.read_csv("schedule.csv")
pbp.head()

  pbp = pd.read_csv("pbp.csv")


Unnamed: 0,eventId,timeInPeriod,timeRemaining,situationCode,homeTeamDefendingSide,typeCode,typeDescKey,sortOrder,periodDescriptor.number,periodDescriptor.periodType,periodDescriptor.maxRegulationPeriods,details.eventOwnerTeamId,details.losingPlayerId,details.winningPlayerId,details.xCoord,details.yCoord,details.zoneCode,details.hittingPlayerId,details.hitteePlayerId,details.shotType,details.shootingPlayerId,details.goalieInNetId,details.awaySOG,details.homeSOG,details.reason,details.typeCode,details.descKey,details.duration,details.committedByPlayerId,details.drawnByPlayerId,details.blockingPlayerId,details.playerId,details.secondaryReason,details.scoringPlayerId,details.scoringPlayerTotal,details.assist1PlayerId,details.assist1PlayerTotal,details.assist2PlayerId,details.assist2PlayerTotal,details.awayScore,details.homeScore,details.discreteClip,details.servedByPlayerId,gameId,periodDescriptor.otPeriods,pptReplayUrl,details.highlightClipSharingUrl,details.highlightClipSharingUrlFr,details.highlightClip,details.highlightClipFr,details.discreteClipFr
0,51.0,00:00,20:00,1551.0,right,520.0,period-start,8.0,1.0,REG,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2021010001,,,,,,,
1,53.0,00:00,20:00,1551.0,right,502.0,faceoff,9.0,1.0,REG,3.0,10.0,8480018.0,8475166.0,0.0,0.0,N,,,,,,,,,,,,,,,,,,,,,,,,,,,2021010001,,,,,,,
2,101.0,00:15,19:45,1551.0,right,503.0,hit,10.0,1.0,REG,3.0,10.0,,,-96.0,26.0,O,8475166.0,8481014.0,,,,,,,,,,,,,,,,,,,,,,,,,2021010001,,,,,,,
3,54.0,00:17,19:43,1551.0,right,506.0,shot-on-goal,11.0,1.0,REG,3.0,10.0,,,-49.0,7.0,O,,,snap,8480043.0,8480051.0,0.0,1.0,,,,,,,,,,,,,,,,,,,,2021010001,,,,,,,
4,55.0,00:27,19:33,1551.0,right,507.0,missed-shot,12.0,1.0,REG,3.0,10.0,,,-31.0,39.0,O,,,wrist,8480043.0,8480051.0,,,wide-of-net,,,,,,,,,,,,,,,,,,,2021010001,,,,,,,


In [7]:
# Fix universal dataset data types
schedule = fix_schedule_types(schedule)

### Split Play-By-Play Into Train (2021-22, 2022-23, 2023-24) and Test (2024-25), Additionally Filter for Regular Season Only

In [8]:
# Set "gameId" to a string
pbp["gameId"] = pbp["gameId"].apply(str)

# Split into training and testing events
train_pbp = pbp.loc[pbp.gameId.str.startswith(("2021", "2022", "2023"), na=False)]
test_pbp = pbp.loc[pbp.gameId.str.startswith("2024", na=False)]
print("Training events: ", len(train_pbp))
print("Testing events: ", len(test_pbp))

Training events:  1404747
Testing events:  479352


In [9]:
# Keep only regular season games (remove pre-season and playoffs)
train_pbp = train_pbp.loc[train_pbp["gameId"].str[5] == "2"]
test_pbp = test_pbp.loc[test_pbp["gameId"].str[5] == "2"]
print("Training events: ", len(train_pbp))
print("Testing events: ", len(test_pbp))

Training events:  1227064
Testing events:  420073


In [77]:
# Create timeInPeriodSeconds
train_pbp["timeInPeriodSeconds"] = pd.to_timedelta("00:" + train_pbp["timeInPeriod"]).dt.total_seconds().astype(int)
test_pbp["timeInPeriodSeconds"] = pd.to_timedelta("00:" + test_pbp["timeInPeriod"]).dt.total_seconds().astype(int)

In [108]:
train_pbp

Unnamed: 0,eventId,timeInPeriod,timeRemaining,situationCode,homeTeamDefendingSide,typeCode,typeDescKey,sortOrder,periodDescriptor.number,periodDescriptor.periodType,periodDescriptor.maxRegulationPeriods,details.eventOwnerTeamId,details.losingPlayerId,details.winningPlayerId,details.xCoord,details.yCoord,details.zoneCode,details.hittingPlayerId,details.hitteePlayerId,details.shotType,details.shootingPlayerId,details.goalieInNetId,details.awaySOG,details.homeSOG,details.reason,details.typeCode,details.descKey,details.duration,details.committedByPlayerId,details.drawnByPlayerId,details.blockingPlayerId,details.playerId,details.secondaryReason,details.scoringPlayerId,details.scoringPlayerTotal,details.assist1PlayerId,details.assist1PlayerTotal,details.assist2PlayerId,details.assist2PlayerTotal,details.awayScore,details.homeScore,details.discreteClip,details.servedByPlayerId,gameId,periodDescriptor.otPeriods,pptReplayUrl,details.highlightClipSharingUrl,details.highlightClipSharingUrlFr,details.highlightClip,details.highlightClipFr,details.discreteClipFr,lastEvent,timeInPeriodSeconds,timeInPeriodShifted,timeSinceLastEvent
1806,8.0,00:00,20:00,1551.0,right,520.0,period-start,6.0,1.0,REG,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2021020003,,,,,,,,,0,,
1807,9.0,00:00,20:00,1551.0,right,502.0,faceoff,9.0,1.0,REG,3.0,8.0,8475166.0,8477989.0,0.0,0.0,N,,,,,,,,,,,,,,,,,,,,,,,,,,,2021020003,,,,,,,,period-start,0,0.0,0.0
1808,51.0,00:11,19:49,1551.0,right,503.0,hit,10.0,1.0,REG,3.0,10.0,,,-95.0,-16.0,O,8477941.0,8477989.0,,,,,,,,,,,,,,,,,,,,,,,,,2021020003,,,,,,,,faceoff,11,0.0,11.0
1809,10.0,00:15,19:45,1551.0,right,506.0,shot-on-goal,11.0,1.0,REG,3.0,8.0,,,71.0,13.0,O,,,slap,8476967.0,8475789.0,1.0,0.0,,,,,,,,,,,,,,,,,,,,2021020003,,,,,,,,hit,15,11.0,4.0
1810,11.0,00:18,19:42,1551.0,right,507.0,missed-shot,12.0,1.0,REG,3.0,8.0,,,66.0,5.0,O,,,wrist,8476981.0,8475789.0,,,wide-of-net,,,,,,,,,,,,,,,,,,,2021020003,,,,,,,,shot-on-goal,18,15.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1404742,1157.0,19:18,00:42,1551.0,left,508.0,blocked-shot,776.0,3.0,REG,3.0,55.0,,,81.0,-14.0,D,,,,8474586.0,,,,blocked,,,,,,8481806.0,,,,,,,,,,,,,2023020743,,,,,,,,blocked-shot,1158,1130.0,28.0
1404743,339.0,19:20,00:40,1551.0,left,516.0,stoppage,777.0,3.0,REG,3.0,,,,,,,,,,,,,,goalie-stopped-after-sog,,,,,,,,,,,,,,,,,,,2023020743,,,,,,,,blocked-shot,1160,1158.0,2.0
1404744,397.0,19:20,00:40,1551.0,left,502.0,faceoff,780.0,3.0,REG,3.0,16.0,8476913.0,8477450.0,69.0,-22.0,D,,,,,,,,,,,,,,,,,,,,,,,,,,,2023020743,,,,,,,,stoppage,1160,1160.0,0.0
1404745,340.0,20:00,00:00,1551.0,left,521.0,period-end,781.0,3.0,REG,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2023020743,,,,,,,,faceoff,1200,1160.0,40.0


In [109]:
# Create a column to denote strength

# Breakup code 
train_pbp["home_goalie"] = train_pbp["situationCode"].str[0].astype(int)
train_pbp["home_skaters"] = train_pbp["situationCode"].str[1].astype(int)
train_pbp["away_goalie"] = train_pbp["situationCode"].str[2].astype(int)
train_pbp["away_skaters"] = train_pbp["situationCode"].str[3].astype(int)

test_pbp["home_goalie"] = test_pbp["situationCode"].str[0].astype(int)
test_pbp["home_skaters"] = test_pbp["situationCode"].str[1].astype(int)
test_pbp["away_goalie"] = test_pbp["situationCode"].str[2].astype(int)
test_pbp["away_skaters"] = test_pbp["situationCode"].str[3].astype(int)

# Calculate players on ice for each time
train_pbp["home_players"] = train_pbp["home_goalie"] + train_pbp["home_skaters"]
train_pbp["away_players"] = train_pbp["away_goalie"] + train_pbp["away_skaters"]

test_pbp["home_players"] = test_pbp["home_goalie"] + test_pbp["home_skaters"]
test_pbp["away_players"] = test_pbp["away_goalie"] + test_pbp["away_skaters"]

train_pbp["Situation"] = train_pbp.apply(get_strength, axis = 1) 
test_pbp["Situation"] = test_pbp.apply(get_strength, axis = 1) 

IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer

In [80]:
# Create a two new two new columns to help define rebounds (lastEvent & timeSinceLastEvent)
train_pbp["lastEvent"] = train_pbp.groupby("gameId")["typeDescKey"].shift(1)
test_pbp["lastEvent"] = test_pbp.groupby("gameId")["typeDescKey"].shift(1)

train_pbp["timeInPeriodShifted"] = train_pbp.groupby("gameId")["timeInPeriodSeconds"].shift(1)
test_pbp["timeInPeriodShifted"] = test_pbp.groupby("gameId")["timeInPeriodSeconds"].shift(1)

train_pbp["timeSinceLastEvent"] = train_pbp["timeInPeriodSeconds"] - train_pbp["timeInPeriodShifted"]
test_pbp["timeSinceLastEvent"] = test_pbp["timeInPeriodSeconds"] - test_pbp["timeInPeriodShifted"]

### [Krzywicki (2005)](https://www.hockeyanalytics.com/Research_files/Shot_Quality_Krzywicki.pdf)

In [10]:
# Variables
# - Distance (<10ft, 10ft-12ft, 13ft-14ft, 15ft-16ft, 17ft-22ft, 23ft-31ft, 32ft-36ft, 37ft-38ft
#             39ft-44ft, 45ft-57ft, >=58ft)
# - Shot Type (Wrap, Slap, Wrist, Snap, Backhand, Tip)
# - Rebound (Yes, No)
# - Situation (Even Strength, Shorthanded, Powerplay)

In [81]:
# Keep only shot and goal events
train_pbp_krzy05 = keep_SOG(train_pbp)
test_pbp_krzy05 = keep_SOG(test_pbp)
print("Training shots: ", len(train_pbp_krzy05))
print("Testing shots: ", len(test_pbp_krzy05))

Training shots:  246134
Testing shots:  74641


In [82]:
# Fix data types
train_pbp_krzy05 = fix_pbp_types(train_pbp_krzy05)
test_pbp_krzy05 = fix_pbp_types(test_pbp_krzy05)

In [83]:
# Reduce columns to those neccesary for the model
keep_pbp = ["timeInPeriod", "timeRemaining", "situationCode", "typeDescKey", "periodDescriptor.number", "details.eventOwnerTeamId", "details.xCoord", "details.yCoord", "details.shotType", "gameId", "homeTeamDefendingSide", "lastEvent", "timeSinceLastEvent"]
keep_schedule = ["id", "awayTeam.id", "homeTeam.id"]
train_pbp_krzy05 = pd.merge(train_pbp_krzy05[keep_pbp],schedule[keep_schedule], left_on = "gameId", right_on = "id", how = "inner")
test_pbp_krzy05 = pd.merge(test_pbp_krzy05[keep_pbp],schedule[keep_schedule], left_on = "gameId", right_on = "id", how = "inner")
train_pbp_krzy05.head()

Unnamed: 0,timeInPeriod,timeRemaining,situationCode,typeDescKey,periodDescriptor.number,details.eventOwnerTeamId,details.xCoord,details.yCoord,details.shotType,gameId,homeTeamDefendingSide,lastEvent,timeSinceLastEvent,id,awayTeam.id,homeTeam.id
0,00:15,19:45,1551,shot-on-goal,1,8,71.0,13.0,slap,2021020003,right,hit,4.0,2021020003,8,10
1,00:18,19:42,1551,shot-on-goal,1,8,78.0,-22.0,wrist,2021020003,right,missed-shot,0.0,2021020003,8,10
2,01:16,18:44,1551,shot-on-goal,1,10,-60.0,-18.0,wrist,2021020003,right,hit,26.0,2021020003,8,10
3,01:23,18:37,1551,shot-on-goal,1,8,64.0,-16.0,wrist,2021020003,right,shot-on-goal,7.0,2021020003,8,10
4,02:50,17:10,1551,shot-on-goal,1,8,31.0,20.0,slap,2021020003,right,blocked-shot,2.0,2021020003,8,10


In [84]:
# Drop rows with details.shotType (38 train, 24 test), this is the only column containing NaN
train_pbp_krzy05 = train_pbp_krzy05[train_pbp_krzy05["details.shotType"].notna()]
test_pbp_krzy05 = test_pbp_krzy05[test_pbp_krzy05["details.shotType"].notna()]
print("Training shots: ", len(train_pbp_krzy05))
print("Testing shots: ", len(test_pbp_krzy05))

Training shots:  246096
Testing shots:  74617


In [85]:
#Create an abs.xCoord column, so shots are in one direction
train_pbp_krzy05["abs.xCoord"] = train_pbp_krzy05["details.xCoord"].abs()
test_pbp_krzy05["abs.xCoord"] = test_pbp_krzy05["details.xCoord"].abs()

# Determine whether home team took the shot
train_pbp_krzy05["homeTeamShot"] = train_pbp_krzy05["details.eventOwnerTeamId"] == train_pbp_krzy05["homeTeam.id"]
test_pbp_krzy05["homeTeamShot"] = test_pbp_krzy05["details.eventOwnerTeamId"] == test_pbp_krzy05["homeTeam.id"]

# Determine which direction the shot was taken
train_pbp_krzy05["shotSide"] = train_pbp_krzy05.apply(shot_side, axis = 1)
test_pbp_krzy05["shotSide"] = train_pbp_krzy05.apply(shot_side, axis = 1)

# Adjust shots taken towards the left net
train_pbp_krzy05["adj.xCoord"] = train_pbp_krzy05.apply(lambda row: abs(row["details.xCoord"]) if row["shotSide"] == "left" else row["details.xCoord"], axis = 1)
train_pbp_krzy05["adj.yCoord"] = train_pbp_krzy05.apply(lambda row: abs(row["details.yCoord"]) if row["shotSide"] == "left" else row["details.yCoord"], axis = 1)

test_pbp_krzy05["adj.xCoord"] = test_pbp_krzy05.apply(lambda row: abs(row["details.xCoord"]) if row["shotSide"] == "left" else row["details.xCoord"], axis = 1)
test_pbp_krzy05["adj.yCoord"] = test_pbp_krzy05.apply(lambda row: abs(row["details.yCoord"]) if row["shotSide"] == "left" else row["details.yCoord"], axis = 1)

train_pbp_krzy05.head()

Unnamed: 0,timeInPeriod,timeRemaining,situationCode,typeDescKey,periodDescriptor.number,details.eventOwnerTeamId,details.xCoord,details.yCoord,details.shotType,gameId,homeTeamDefendingSide,lastEvent,timeSinceLastEvent,id,awayTeam.id,homeTeam.id,abs.xCoord,homeTeamShot,shotSide,adj.xCoord,adj.yCoord
0,00:15,19:45,1551,shot-on-goal,1,8,71.0,13.0,slap,2021020003,right,hit,4.0,2021020003,8,10,71.0,False,right,71.0,13.0
1,00:18,19:42,1551,shot-on-goal,1,8,78.0,-22.0,wrist,2021020003,right,missed-shot,0.0,2021020003,8,10,78.0,False,right,78.0,-22.0
2,01:16,18:44,1551,shot-on-goal,1,10,-60.0,-18.0,wrist,2021020003,right,hit,26.0,2021020003,8,10,60.0,True,left,60.0,18.0
3,01:23,18:37,1551,shot-on-goal,1,8,64.0,-16.0,wrist,2021020003,right,shot-on-goal,7.0,2021020003,8,10,64.0,False,right,64.0,-16.0
4,02:50,17:10,1551,shot-on-goal,1,8,31.0,20.0,slap,2021020003,right,blocked-shot,2.0,2021020003,8,10,31.0,False,right,31.0,20.0


In [95]:
# Create shotDistance column
train_pbp_krzy05 = get_shot_distance_class(train_pbp_krzy05, "krzy05")
test_pbp_krzy05 = get_shot_distance_class(test_pbp_krzy05, "krzy05")

train_pbp_krzy05.head()

Unnamed: 0,timeInPeriod,timeRemaining,situationCode,typeDescKey,periodDescriptor.number,details.eventOwnerTeamId,details.xCoord,details.yCoord,details.shotType,gameId,homeTeamDefendingSide,lastEvent,timeSinceLastEvent,id,awayTeam.id,homeTeam.id,abs.xCoord,homeTeamShot,shotSide,adj.xCoord,adj.yCoord,shotDistance,shotDistanceClass,Rebound
0,00:15,19:45,1551,shot-on-goal,1,8,71.0,13.0,slap,2021020003,right,hit,4.0,2021020003,8,10,71.0,False,right,71.0,13.0,22.203603,4,False
1,00:18,19:42,1551,shot-on-goal,1,8,78.0,-22.0,wrist,2021020003,right,missed-shot,0.0,2021020003,8,10,78.0,False,right,78.0,-22.0,24.596748,5,False
2,01:16,18:44,1551,shot-on-goal,1,10,-60.0,-18.0,wrist,2021020003,right,hit,26.0,2021020003,8,10,60.0,True,left,60.0,18.0,34.132096,6,False
3,01:23,18:37,1551,shot-on-goal,1,8,64.0,-16.0,wrist,2021020003,right,shot-on-goal,7.0,2021020003,8,10,64.0,False,right,64.0,-16.0,29.681644,5,False
4,02:50,17:10,1551,shot-on-goal,1,8,31.0,20.0,slap,2021020003,right,blocked-shot,2.0,2021020003,8,10,31.0,False,right,31.0,20.0,61.351447,10,False


In [97]:
# Create rebound column as defined by Ryder (2004)
train_pbp_krzy05["Rebound"] = ((train_pbp_krzy05["lastEvent"] == "shot-on-goal") & (train_pbp_krzy05["timeSinceLastEvent"] >=0) & (train_pbp_krzy05["timeSinceLastEvent"] <= 2))
test_pbp_krzy05["Rebound"] = ((test_pbp_krzy05["lastEvent"] == "shot-on-goal") & (test_pbp_krzy05["timeSinceLastEvent"] >=0) & (test_pbp_krzy05["timeSinceLastEvent"] <= 2))
train_pbp_krzy05

Unnamed: 0,timeInPeriod,timeRemaining,situationCode,typeDescKey,periodDescriptor.number,details.eventOwnerTeamId,details.xCoord,details.yCoord,details.shotType,gameId,homeTeamDefendingSide,lastEvent,timeSinceLastEvent,id,awayTeam.id,homeTeam.id,abs.xCoord,homeTeamShot,shotSide,adj.xCoord,adj.yCoord,shotDistance,shotDistanceClass,Rebound
0,00:15,19:45,1551,shot-on-goal,1,8,71.0,13.0,slap,2021020003,right,hit,4.0,2021020003,8,10,71.0,False,right,71.0,13.0,22.203603,4,False
1,00:18,19:42,1551,shot-on-goal,1,8,78.0,-22.0,wrist,2021020003,right,missed-shot,0.0,2021020003,8,10,78.0,False,right,78.0,-22.0,24.596748,5,False
2,01:16,18:44,1551,shot-on-goal,1,10,-60.0,-18.0,wrist,2021020003,right,hit,26.0,2021020003,8,10,60.0,True,left,60.0,18.0,34.132096,6,False
3,01:23,18:37,1551,shot-on-goal,1,8,64.0,-16.0,wrist,2021020003,right,shot-on-goal,7.0,2021020003,8,10,64.0,False,right,64.0,-16.0,29.681644,5,False
4,02:50,17:10,1551,shot-on-goal,1,8,31.0,20.0,slap,2021020003,right,blocked-shot,2.0,2021020003,8,10,31.0,False,right,31.0,20.0,61.351447,10,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
246129,10:59,09:01,1551,shot-on-goal,3,55,73.0,-17.0,snap,2023020743,left,blocked-shot,8.0,2023020743,16,55,73.0,True,right,73.0,-17.0,23.345235,5,False
246130,12:20,07:40,1551,shot-on-goal,3,55,40.0,-40.0,wrist,2023020743,left,takeaway,2.0,2023020743,16,55,40.0,True,right,40.0,-40.0,63.253458,10,False
246131,13:19,06:41,1551,shot-on-goal,3,16,-69.0,22.0,wrist,2023020743,left,blocked-shot,23.0,2023020743,16,55,69.0,False,left,69.0,22.0,29.732137,5,False
246132,16:20,03:40,1551,shot-on-goal,3,55,44.0,-39.0,wrist,2023020743,left,faceoff,21.0,2023020743,16,55,44.0,True,right,44.0,-39.0,59.548300,10,False


In [103]:
# Create a column to denote strength

# Breakup code 
train_pbp_krzy05["home_goalie"] = train_pbp_krzy05["situationCode"].str[0].astype(int)
train_pbp_krzy05["home_skaters"] = train_pbp_krzy05["situationCode"].str[1].astype(int)
train_pbp_krzy05["away_goalie"] = train_pbp_krzy05["situationCode"].str[2].astype(int)
train_pbp_krzy05["away_skaters"] = train_pbp_krzy05["situationCode"].str[3].astype(int)

# Calculate players on ice for each time
train_pbp_krzy05["home_players"] = train_pbp_krzy05["home_goalie"] + train_pbp_krzy05["home_skaters"]
train_pbp_krzy05["away_players"] = train_pbp_krzy05["away_goalie"] + train_pbp_krzy05["away_skaters"]

train_pbp_krzy05["Situation"] = train_pbp_krzy05.apply(get_strength, axis = 1) 

train_pbp_krzy05

Unnamed: 0,timeInPeriod,timeRemaining,situationCode,typeDescKey,periodDescriptor.number,details.eventOwnerTeamId,details.xCoord,details.yCoord,details.shotType,gameId,homeTeamDefendingSide,lastEvent,timeSinceLastEvent,id,awayTeam.id,homeTeam.id,abs.xCoord,homeTeamShot,shotSide,adj.xCoord,adj.yCoord,shotDistance,shotDistanceClass,Rebound,home_goalie,home_skaters,away_goalie,away_skaters,home_players,away_players,Situation
0,00:15,19:45,1551,shot-on-goal,1,8,71.0,13.0,slap,2021020003,right,hit,4.0,2021020003,8,10,71.0,False,right,71.0,13.0,22.203603,4,False,1,5,5,1,6,6,EV
1,00:18,19:42,1551,shot-on-goal,1,8,78.0,-22.0,wrist,2021020003,right,missed-shot,0.0,2021020003,8,10,78.0,False,right,78.0,-22.0,24.596748,5,False,1,5,5,1,6,6,EV
2,01:16,18:44,1551,shot-on-goal,1,10,-60.0,-18.0,wrist,2021020003,right,hit,26.0,2021020003,8,10,60.0,True,left,60.0,18.0,34.132096,6,False,1,5,5,1,6,6,EV
3,01:23,18:37,1551,shot-on-goal,1,8,64.0,-16.0,wrist,2021020003,right,shot-on-goal,7.0,2021020003,8,10,64.0,False,right,64.0,-16.0,29.681644,5,False,1,5,5,1,6,6,EV
4,02:50,17:10,1551,shot-on-goal,1,8,31.0,20.0,slap,2021020003,right,blocked-shot,2.0,2021020003,8,10,31.0,False,right,31.0,20.0,61.351447,10,False,1,5,5,1,6,6,EV
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
246129,10:59,09:01,1551,shot-on-goal,3,55,73.0,-17.0,snap,2023020743,left,blocked-shot,8.0,2023020743,16,55,73.0,True,right,73.0,-17.0,23.345235,5,False,1,5,5,1,6,6,EV
246130,12:20,07:40,1551,shot-on-goal,3,55,40.0,-40.0,wrist,2023020743,left,takeaway,2.0,2023020743,16,55,40.0,True,right,40.0,-40.0,63.253458,10,False,1,5,5,1,6,6,EV
246131,13:19,06:41,1551,shot-on-goal,3,16,-69.0,22.0,wrist,2023020743,left,blocked-shot,23.0,2023020743,16,55,69.0,False,left,69.0,22.0,29.732137,5,False,1,5,5,1,6,6,EV
246132,16:20,03:40,1551,shot-on-goal,3,55,44.0,-39.0,wrist,2023020743,left,faceoff,21.0,2023020743,16,55,44.0,True,right,44.0,-39.0,59.548300,10,False,1,5,5,1,6,6,EV


In [60]:
pbp[pbp["typeDescKey"] == "missed-shot"].groupby(["details.reason"]).count()

Unnamed: 0_level_0,eventId,timeInPeriod,timeRemaining,situationCode,homeTeamDefendingSide,typeCode,typeDescKey,sortOrder,periodDescriptor.number,periodDescriptor.periodType,periodDescriptor.maxRegulationPeriods,details.eventOwnerTeamId,details.losingPlayerId,details.winningPlayerId,details.xCoord,details.yCoord,details.zoneCode,details.hittingPlayerId,details.hitteePlayerId,details.shotType,details.shootingPlayerId,details.goalieInNetId,details.awaySOG,details.homeSOG,details.typeCode,details.descKey,details.duration,details.committedByPlayerId,details.drawnByPlayerId,details.blockingPlayerId,details.playerId,details.secondaryReason,details.scoringPlayerId,details.scoringPlayerTotal,details.assist1PlayerId,details.assist1PlayerTotal,details.assist2PlayerId,details.assist2PlayerTotal,details.awayScore,details.homeScore,details.discreteClip,details.servedByPlayerId,gameId,periodDescriptor.otPeriods,pptReplayUrl,details.highlightClipSharingUrl,details.highlightClipSharingUrlFr,details.highlightClip,details.highlightClipFr,details.discreteClipFr
details.reason,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
above-crossbar,5403,5403,5403,5403,5403,5403,5403,5403,5403,5403,5403,5403,0,0,5403,5403,5403,0,0,5403,5403,5393,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5403,0,0,0,0,0,0,0
failed-bank-attempt,471,471,471,471,471,471,471,471,471,471,471,471,0,0,471,471,471,0,0,471,471,471,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,471,0,0,0,0,0,0,0
goalpost,3712,3712,3712,3712,3712,3712,3712,3712,3712,3712,3712,3712,0,0,3712,3712,3712,0,0,3712,3712,3622,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3712,4,0,0,0,0,0,0
high-and-wide-left,3444,3444,3444,3444,3444,3444,3444,3444,3444,3444,3444,3444,0,0,3444,3444,3444,0,0,3444,3444,3439,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3444,0,0,0,0,0,0,0
high-and-wide-right,3987,3987,3987,3987,3987,3987,3987,3987,3987,3987,3987,3987,0,0,3987,3987,3987,0,0,3987,3987,3982,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3987,0,0,0,0,0,0,0
hit-crossbar,2463,2463,2463,2463,2463,2463,2463,2463,2463,2463,2463,2463,0,0,2463,2463,2463,0,0,2463,2463,2456,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2463,1,0,0,0,0,0,0
hit-left-post,2266,2266,2266,2266,2266,2266,2266,2266,2266,2266,2266,2266,0,0,2266,2266,2266,0,0,2266,2266,2212,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2266,1,0,0,0,0,0,0
hit-right-post,2369,2369,2369,2369,2369,2369,2369,2369,2369,2369,2369,2369,0,0,2369,2369,2369,0,0,2369,2369,2329,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2369,1,0,0,0,0,0,0
over-net,6801,6801,6801,6801,6801,6801,6801,6801,6801,6801,6801,6801,0,0,6801,6801,6801,0,0,6801,6801,6791,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6801,6,0,0,0,0,0,0
short,2369,2369,2369,2369,2369,2369,2369,2369,2369,2369,2369,2369,0,0,2369,2369,2369,0,0,2369,2369,2360,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2369,1,0,0,0,0,0,0
