# Building an Interpretable Expected Goals (xG) Model
* Author: Oliver Mueller
* Date: 2024-02-15

In [30]:
import warnings
warnings.filterwarnings("ignore")

import datetime
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm
import matplotlib.pyplot as plt
from statsbombpy import sb
import math
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, roc_auc_score, auc, RocCurveDisplay
from sklearn.metrics import classification_report
import statsmodels.api as sm
import statsmodels.formula.api as smf

## Load data

Using the `statsbombpy`package, we will query the StatsBomb API for all events of the Bundesliga season 2015/16.  

In [2]:
grouped_events = sb.competition_events(
    country="Germany",
    division= "1. Bundesliga",
    season="2015/2016",
    gender="male",
    split=True
)



What types of events do we have?

In [3]:
grouped_events.keys()

dict_keys(['starting_xis', 'half_starts', 'passes', 'ball_receipts', 'carrys', 'pressures', 'blocks', 'ball_recoverys', 'miscontrols', 'interceptions', 'foul_committeds', 'foul_wons', 'shots', 'goal_keepers', 'duels', 'dribbles', 'dribbled_pasts', 'clearances', 'dispossesseds', 'tactical_shifts', 'injury_stoppages', 'player_offs', 'player_ons', 'half_ends', 'substitutions', 'shields', 'own_goal_againsts', 'own_goal_fors', '50/50s', 'referee_ball_drops', 'bad_behaviours', 'offsides', 'errors'])

Extract and explore the shots.

In [4]:
shots = grouped_events["shots"]

In [5]:
shots.shape

(7831, 40)

In [6]:
shots.head()

Unnamed: 0,id,index,period,timestamp,minute,second,type,possession,possession_team,play_pattern,...,shot_aerial_won,shot_one_on_one,shot_deflected,out,shot_open_goal,shot_redirect,shot_saved_off_target,shot_saved_to_post,off_camera,shot_follows_dribble
0,ba46e9d6-e828-4599-952c-39c1f7d22659,263,1,00:04:21.052,4,21,Shot,7,Hoffenheim,Regular Play,...,,,,,,,,,,
1,85d67225-30fb-47c8-b478-cf568941a164,353,1,00:06:27.395,6,27,Shot,11,Schalke 04,From Throw In,...,,,,,,,,,,
2,adac17d3-5e67-4e8c-b482-4bae2f36e06e,410,1,00:08:16.762,8,16,Shot,15,Hoffenheim,From Throw In,...,,,,,,,,,,
3,abffd193-62bc-4c8d-8636-1e3f0f0ebbe5,624,1,00:13:55.721,13,55,Shot,27,Schalke 04,From Counter,...,,,,,,,,,,
4,d9cea903-f92a-40e1-a393-1a849d83f157,749,1,00:17:16.953,17,16,Shot,33,Schalke 04,Regular Play,...,,,,,,,,,,


In [7]:
shots.columns

Index(['id', 'index', 'period', 'timestamp', 'minute', 'second', 'type',
       'possession', 'possession_team', 'play_pattern', 'team', 'player',
       'position', 'location', 'duration', 'related_events', 'match_id',
       'shot_statsbomb_xg', 'shot_end_location', 'shot_key_pass_id',
       'shot_type', 'shot_outcome', 'shot_first_time', 'shot_technique',
       'shot_body_part', 'shot_freeze_frame', 'possession_team_id', 'team_id',
       'player_id', 'under_pressure', 'shot_aerial_won', 'shot_one_on_one',
       'shot_deflected', 'out', 'shot_open_goal', 'shot_redirect',
       'shot_saved_off_target', 'shot_saved_to_post', 'off_camera',
       'shot_follows_dribble'],
      dtype='object')

In [8]:
shots.iloc[0]

id                                    ba46e9d6-e828-4599-952c-39c1f7d22659
index                                                                  263
period                                                                   1
timestamp                                                     00:04:21.052
minute                                                                   4
second                                                                  21
type                                                                  Shot
possession                                                               7
possession_team                                                 Hoffenheim
play_pattern                                                  Regular Play
team                                                            Hoffenheim
player                                                   Tarik Elyounoussi
position                                                    Left Wing Back
location                 

In [9]:
shots.iloc[0]["shot_freeze_frame"]

[{'location': [115.7, 50.0],
  'player': {'id': 3510, 'name': 'Sead Kolašinac'},
  'position': {'id': 6, 'name': 'Left Back'},
  'teammate': False},
 {'location': [113.2, 39.3],
  'player': {'id': 3502, 'name': 'Joël Andre Job Matip'},
  'position': {'id': 3, 'name': 'Right Center Back'},
  'teammate': False},
 {'location': [119.0, 42.5],
  'player': {'id': 16534, 'name': 'Roman Neustädter'},
  'position': {'id': 5, 'name': 'Left Center Back'},
  'teammate': False},
 {'location': [111.6, 34.8],
  'player': {'id': 7016, 'name': 'Benedikt Höwedes'},
  'position': {'id': 2, 'name': 'Right Back'},
  'teammate': False},
 {'location': [96.7, 36.3],
  'player': {'id': 6039, 'name': 'Sebastian Rudy'},
  'position': {'id': 11, 'name': 'Left Defensive Midfield'},
  'teammate': True},
 {'location': [104.0, 43.8],
  'player': {'id': 8556, 'name': 'Pirmin Schwegler'},
  'position': {'id': 9, 'name': 'Right Defensive Midfield'},
  'teammate': True},
 {'location': [109.2, 37.7],
  'player': {'id': 54

In [10]:
shots["shot_outcome"].value_counts()

shot_outcome
Off T               2452
Saved               1961
Blocked             1820
Goal                 837
Wayward              550
Post                 160
Saved to Post         35
Saved Off Target      16
Name: count, dtype: int64