# Expected Goals Model
Re-use pre-trained model

In [1]:
import pandas as pd
import numpy as np
from tensorflow import keras



In [2]:
def open_base_xgoals_model():
    return keras.models.load_model('base_learner_v3')

In [3]:
base_learner = open_base_xgoals_model()

2024-05-01 23:15:21.187783: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-05-01 23:15:21.188256: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M2


In [54]:
def transform_shot_data(df):
    df = df[df['period'] != 5] # filter shootout attempts
    
    # shot type col was renamed when migrating to api_v2
    df.rename({'shot_type': 'shotType'}, axis=1, inplace=True)


    # find total seconds between shots (periodTime)
    df['timeSeconds'] = df['periodTime'].str[:2].astype(int) * 60 + df['periodTime'].str[3:].astype(int)
    df['timeSeconds'] = np.where(df['period'] == 2, df['timeSeconds'] + 1200, 
                            np.where(df['period'] == 3, df['timeSeconds'] + 2400, 
                                np.where(df['period'] == 4, df['timeSeconds'] + 3600, df['timeSeconds'])))
    df = df.sort_values(by=['gameId', 'teamId', 'timeSeconds'])
    df['timeSecondsDiff'] = df.groupby(['gameId', 'teamId'])['timeSeconds'].diff(1).fillna(df['timeSeconds'])

    # get total shots
    df['totalShots'] = 1
    df['totalShots'] = df.groupby(['gameId', 'teamId'])['totalShots'].cumsum()

    # fix all shots to one side of ice (adjust x and y coordinates)
    df.loc[df['x'] < 0, 'y'] = df['y'] * -1
    df['x'] = df['x'].abs()
    df = df[df['x'].notna()]

    # calculate angle of shot compared to goal
    x_goal = 89
    df['shotAngle'] = np.where(df['x'] != x_goal,
                                # when shot is from behind the net
                                np.where(df['x'] > x_goal,
                                        np.where(df['y'] >= 0,
                                                round(90 + (90 - np.arctan(df['y'] / (df['x'] - x_goal)) * (180 / np.pi)), 2),
                                                round(-90 - (90 + np.arctan(df['y'] / (df['x'] - x_goal)) * (180 / np.pi)), 2)
                                                ),
                                        # when shot is in front of net
                                        round(np.arctan(df['y'] / (x_goal - df['x'])) * (180 / np.pi), 2)
                                        ),
                                # when shot is taken on the goal line
                                np.where(df['y'] >= 0, 90, -90)
                            )

    # calculate difference in shot angle
    df['shotAnglePrev'] = df.groupby(['gameId', 'teamId'])['shotAngle'].shift(1).fillna(0)
    df['shotAngleDiff'] = np.absolute(np.where(df['shotAngle'] < df['shotAnglePrev'],
                                                    df['shotAngle'] - df['shotAnglePrev'],
                                                    df['shotAnglePrev'] - df['shotAngle']
                                                )
                                    )

    # determine if goalie moved to his right since last shot to attempt to make save
    df['goalieMoveRight'] = np.where(df['shotAngle'] > df['shotAnglePrev'], True, False)

    # calculate shot distance to goal
    df['shotDist'] = round(np.sqrt(np.square(df['x'] - x_goal) + np.square(df['y'])), 2)
    df['shotDistPrev'] = df.groupby(['gameId', 'teamId'])['shotDist'].shift(1).fillna(0)
    df['shotDistDiff'] = df['shotDist'] - df['shotDistPrev']

    # interaction between shotDistDiff and shotAngleDiff
    df['shotAngleDiffXshotDistDiff'] = df['shotAngleDiff'] * df['shotDistDiff']

    # generate dummy vars for shot_type
    df = pd.get_dummies(df, columns=['shotType'])

    # generate absolute value of y
    df['ySquared'] = df['y']**2
    
    # convert playerId columns to int
    df['blockerId'] = df['blockerId'].astype('Int64')
    df['goalieId'] = df['goalieId'].astype('Int64')

    return df

In [56]:
def fill_in_missing_shot_type_columns(df):
    shot_type_cols = [
        'shotType_backhand', 'shotType_deflected', 'shotType_slap', 'shotType_snap',
        'shotType_tip-in', 'shotType_wrist', 'shotType_wrap-around', 'shotType_poke',
        'shotType_bat', 'shotType_between-legs', 'shotType_cradle'
    ]
    for shot_type in shot_type_cols:
        if shot_type not in df.columns:
            df[shot_type] = 0
    return df

def normalize_features(df):
    df['x'] = df['x'] / 100
    df['y'] = df['y'] / 42.5
    df['ySquared'] = df['ySquared'] / 1764
    df['totalShots'] = df['totalShots'] / 100
    df['shotAngle'] = df['shotAngle'] / 180
    df['shotAngleDiff'] = df['shotAngleDiff'] / 360
    df['shotDist'] = df['shotDist'] / 100
    df['shotDistDiff'] = df['shotDistDiff'] / 100
    df['timeSecondsDiff'] = np.where(df['timeSecondsDiff']>300, 300, df['timeSecondsDiff'])
    df['timeSecondsDiff'] = df['timeSecondsDiff'] / 300
    return df

def convert_to_numpy_array(df):
    FEATURES_YSQUARED =\
        ['x', 'ySquared', 'timeSecondsDiff', 'totalShots', 'shotAngle',
         'shotAngleDiff', 'goalieMoveRight', 'shotDist', 'shotDistDiff',
         'shotType_backhand', 'shotType_deflected', 'shotType_slap', 'shotType_snap',
         'shotType_tip-in', 'shotType_wrist', 'shotType_wrap-around', 'shotType_poke',
         'shotType_bat', 'shotType_between-legs', 'shotType_cradle']
    return np.asarray(df[FEATURES_YSQUARED]).astype(np.float32)

In [61]:
# read in shots
df_shots = pd.read_csv('shotsSample.csv', index_col=0)

# transform
df = transform_shot_data(df_shots)
df = fill_in_missing_shot_type_columns(df)
df = normalize_features(df)
data_with_ySquared = convert_to_numpy_array(df)

# predict
df_shots['pred'] = base_learner.predict(data_with_ySquared)



In [62]:
df_shots

Unnamed: 0,seasonId,gameId,time,period,periodTime,x,y,shooterId,blockerId,goalieId,teamId,isGoal,isBlocked,type,shot_type,pred
0,2023,2023030121,33,1,00:33,-83,-6,8475166,0,8480280,10,False,False,shot-on-goal,tip-in,0.127264
1,2023,2023030121,45,1,00:45,-65,-9,8482720,0,8480280,10,False,False,shot-on-goal,snap,0.185928
2,2023,2023030121,68,1,01:08,65,2,8477956,0,8478492,6,False,False,missed-shot,wrist,0.068461
3,2023,2023030121,82,1,01:22,-38,0,8476931,0,8480280,10,False,False,missed-shot,wrist,0.000000
4,2023,2023030121,89,1,01:29,-81,5,8481582,0,8480280,10,False,False,shot-on-goal,tip-in,0.043830
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111,2023,2023030121,3503,3,18:23,-85,-20,8480980,0,8480280,10,False,False,shot-on-goal,wrist,0.086123
112,2023,2023030121,3543,3,19:03,-72,35,8480043,0,8480280,10,False,False,missed-shot,wrist,0.039663
113,2023,2023030121,3586,3,19:46,-57,-2,8477503,8479369,0,10,False,True,blocked-shot,unknown,0.220615
114,2023,2023030121,3588,3,19:48,-78,-27,8477503,0,8480280,10,False,False,shot-on-goal,wrist,0.061024


In [63]:
pred = base_learner.predict(data_with_ySquared)



In [64]:
len(pred)

116

In [69]:
df_shots[df_shots['type'].isin(['shot-on-goal', 'goal'])][['teamId', 'pred']]\
    .groupby('teamId').sum()

Unnamed: 0_level_0,pred
teamId,Unnamed: 1_level_1
6,2.911648
10,4.388354


In [68]:
df_shots[df_shots['type'].isin(['shot-on-goal', 'goal'])]

Unnamed: 0,seasonId,gameId,time,period,periodTime,x,y,shooterId,blockerId,goalieId,teamId,isGoal,isBlocked,type,shot_type,pred
0,2023,2023030121,33,1,00:33,-83,-6,8475166,0,8480280,10,False,False,shot-on-goal,tip-in,0.127264
1,2023,2023030121,45,1,00:45,-65,-9,8482720,0,8480280,10,False,False,shot-on-goal,snap,0.185928
4,2023,2023030121,89,1,01:29,-81,5,8481582,0,8480280,10,False,False,shot-on-goal,tip-in,0.043830
5,2023,2023030121,90,1,01:30,-78,0,8481582,0,8480280,10,False,False,shot-on-goal,wrist,0.040822
6,2023,2023030121,146,1,02:26,71,10,8481556,0,8478492,6,True,False,goal,snap,0.278249
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105,2023,2023030121,3204,3,13:24,-77,-2,8477479,0,8480280,10,False,False,shot-on-goal,tip-in,0.223714
109,2023,2023030121,3433,3,17:13,-29,-17,8476931,0,8480280,10,False,False,shot-on-goal,wrist,0.257372
110,2023,2023030121,3472,3,17:52,42,18,8479365,0,0,6,True,False,goal,wrist,0.045068
111,2023,2023030121,3503,3,18:23,-85,-20,8480980,0,8480280,10,False,False,shot-on-goal,wrist,0.086123
