## Intro to using StatsBomb data for match analysis

### Import the event data 

In [1]:
! pip3 install statsbombpy

[33mYou are using pip version 18.1, however version 21.3.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
import statsbomb as sb

comps = sb.Competitions()
print(len(comps))  # 3
json_data = comps.data  # underlying json data

df = comps.get_dataframe()
print(df)

ModuleNotFoundError: No module named 'statsbomb'

In [3]:
import os

from setuptools import setup

with open(os.path.join(os.path.abspath(os.path.dirname(__file__)), "README.md")) as f:
    README = f.read()

setup(
    name="statsbombpy",
    version="1.3.0",
    description="easily stream StatsBomb data into Python",
    long_description=README,
    long_description_content_type="text/markdown",
    url="https://github.com/statsbomb/statsbombpy",
    download_url="https://github.com/statsbomb/statsbombpy/archive/v1.0.tar.gz",
    author="StatsBomb",
    author_email="support@statsbombservices.com",
    packages=["statsbombpy"],
    install_requires=[
        "joblib",
        "inflect",
        "nose2",
        "pandas",
        "requests",
        "requests-cache",
    ],
)

NameError: name '__file__' is not defined

Collecting statsbombpy
  Downloading https://files.pythonhosted.org/packages/27/c0/dad16b0c72ddc3ddbaa10d76e682f2057cb5ce7b954346c2004c2d3c6aa8/statsbombpy-1.3.0-py3-none-any.whl
Collecting joblib (from statsbombpy)
[?25l  Downloading https://files.pythonhosted.org/packages/3e/d5/0163eb0cfa0b673aa4fe1cd3ea9d8a81ea0f32e50807b0c295871e4aab2e/joblib-1.1.0-py2.py3-none-any.whl (306kB)
[K    100% |████████████████████████████████| 307kB 3.9MB/s ta 0:00:011
[?25hCollecting requests (from statsbombpy)
[?25l  Downloading https://files.pythonhosted.org/packages/2d/61/08076519c80041bc0ffa1a8af0cbd3bf3e2b62af10435d269a9d0f40564d/requests-2.27.1-py2.py3-none-any.whl (63kB)
[K    100% |████████████████████████████████| 71kB 5.5MB/s ta 0:00:01
[?25hCollecting inflect (from statsbombpy)
  Downloading https://files.pythonhosted.org/packages/4f/a8/031641ad73a1bd1a9932261a6193864556172b333dde263fed8b5a0940cf/inflect-5.3.0-py3-none-any.whl
Collecting nose2 (from statsbombpy)
[?25l  Downloading htt

In [2]:
import pandas as pd
from statsbombpy import sb

sb.competition.head()

ModuleNotFoundError: No module named 'statsbombpy'

In [None]:
# Get the FIFA World Cup
competition[competition.competition_name == 'FIFA World Cup']

In [None]:
import json

with open('open-data/data/matches/43/3.json') as f:
    data = json.load(f)

data

In [None]:
with open('open-data/data/matches/43/3.json') as f:
    data = json.load(f)
    for i in data:
        print('ID:', i['match_id'], i['home_team']['home_team_name'], i['home_score'], '-', i['away_score'], i['away_team']['away_team_name'])

In [None]:
with open('open-data/data/events/7567.json') as f:
    korger = json.load(f)
    
korger

In [None]:
df = pd.json_normalize(korger, sep='_').assign(match_id="7567")
df.head()

In [None]:
shots = df[df.type_name == 'Shot'].set_index('id')
shots.head()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from FCPython import createPitch

pitch_width = 120
pitch_height = 80

fig, ax = createPitch(pitch_width, pitch_height, 'yards', 'gray')

home_team = 'South Korea'
away_team = 'Germany'

for i, shot in shots.iterrows():
    x = shot['location'][0]
    y = shot['location'][1]
    
    goal = shot['shot_outcome_name']=='Goal'
    team_name = shot['team_name']
    
    circle_size = 2
    circle_size = np.sqrt(shot['shot_statsbomb_xg'] * 15)
    
    if team_name == home_team:
        if goal:
            shot_circle = plt.Circle((x, pitch_height-y), circle_size, color='red')
            plt.text((x+1), pitch_height-y+1, shot['player_name'])
        else:
            shot_circle = plt.Circle((x, pitch_height-y), circle_size, color='red')
            shot_circle.set_alpha(.2)
    elif team_name == away_team:
        if goal:
            shot_circle = plt.Circle((pitch_width-x, y), circle_size, color='blue')
            plt.text((pitch_width-x+1), y+1, shot['player_name'])
        else:
            shot_circle = plt.Circle((pitch_width-x, y), circle_size, color='blue')
            shot_circle.set_alpha(.2)
    
    ax.add_patch(shot_circle)
    
plt.text(5, 75, away_team + ' shots')
plt.text(80, 75, home_team + ' shots')

plt.title('Germany vs South Korea at 2018 FIFA World Cup')

fig.set_size_inches(10, 7)
fig.savefig('korger_shots.png', dpi=300) 

plt.show()

In [None]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import requests
from pandas import json_normalize
import numpy as np
from pitch import Pitch ##a helper function to quickly give us a pitch
import warnings

from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)


In [None]:
match_id = "8658"
side = "away"
color = "blue"
min_pass_count = 2 ##minimum number of passes for a link to be plotted

fig, ax = plt.subplots()
ax = Pitch(ax)


In [None]:
class Player:
    def __init__(self, player, df):
        self.id = player["player"]["id"]
        self.name = player["player"]["name"]
        self.average_position(df)

    def average_position(self, df):

        player_pass_df = df.query("(type_name == 'Pass') & (pass_type_name not in ['Free Kick', 'Corner', 'Throw-in', 'Kick Off']) & (player_id == @self.id) & (pass_outcome_name not in ['Unknown','Out','Pass Offside','Injury Clearance', 'Incomplete'])")
        self.x, self.y = np.mean(player_pass_df['location'].tolist(), axis=0)

        self.n_passes_completed = len(player_pass_df)


In [None]:
def load_file(match_id, getter="remote", path = None):
    """ """

    if getter == "local":
        with open(f"{path}/{match_id}.json", "r", encoding="utf-8") as f:
            match_dict = json.load(f)
            df = json_normalize(match_dict, sep="_")
            df = df.query("location == location")
            df[['x','y']] = pd.DataFrame(df.location.values.tolist(), index= df.index)
            df['y'] = 80 - df['y'] ##Reversing the y-axis co-ordinates because Statsbomb use this weird co-ordinate system
            df['location'] = df[['x', 'y']].apply(list, axis=1)

        return match_dict, df

    elif getter == "remote":
        resp = requests.get(f"https://raw.githubusercontent.com/statsbomb/open-data/master/data/events/{match_id}.json")

        match_dict = json.loads(resp.text)
        df = json_normalize(match_dict, sep="_")
        df = df.query("location == location")
        df[['x','y']] = pd.DataFrame(df.location.values.tolist(), index= df.index)
        df['y'] = 80 - df['y'] ##Reversing the y-axis co-ordinates because Statsbomb use this reversed co-ordinate system
        df['location'] = df[['x', 'y']].apply(list, axis=1)

        return match_dict, df

In [None]:
def get_starters(match_dict, side="home"):
    """ """
    lineups = match_dict[0]["tactics"]["lineup"] if side == "home" else match_dict[1]["tactics"]["lineup"]
    return lineups


In [None]:
side_dict = {"home": match_dict[0]["team"]["name"],
             "away": match_dict[1]["team"]["name"] }

print(side_dict)
