# Installation

kloppy can be installed using `pip install kloppy`. This will install latest version of kloppy.

In [None]:
import sys
!{sys.executable} -m pip install kloppy

# Loading data


In [None]:
from kloppy import load_sportec_event_data
from kloppy.domain import EventType, ShotResult, Orientation


dataset = load_sportec_event_data(
    "/Users/koen/Dropbox/PySport/kloppy-dev/Eventdata_DFL-MAT-003BN1.xml",
    "/Users/koen/Dropbox/PySport/kloppy-dev/Match_Infos_DFL-MAT-003BN1.xml",
)

home_team, away_team = dataset.metadata.teams

shot_dataset = dataset.filter(
    lambda event: event.event_type == EventType.SHOT and event.team == home_team
)



In [None]:
print(home_team)

# Plotting the data

- Install mplsoccer package
- Plot shots of Freiburg

In [None]:
!{sys.executable} -m pip install mplsoccer seaborn

In [None]:
from mplsoccer.pitch import Pitch
import matplotlib.pyplot as plt
#plt.style.use('ggplot')

pitch = Pitch(figsize=(10, 8), pitch_type='uefa')
fig, ax = pitch.draw()
sc = pitch.scatter([0, 105, 105, 0], [68, 0, 68, 0],
                   c=['red', 'blue', 'green', 'yellow'],
                   s=200, label='scatter', ax=ax)

In [None]:


x, y = zip(*[(event.coordinates.x, event.coordinates.y) for event in shot_dataset.events])

pitch = Pitch(figsize=(10, 8), pitch_type='uefa')
fig, ax = pitch.draw()
sc = pitch.scatter(x, y,
                   c='blue',
                   s=200, label='scatter', ax=ax)

In [None]:

shot_dataset = dataset.filter(lambda event: event.event_type == EventType.SHOT and event.team == home_team)

x, y, results = zip(*[(event.coordinates.x, event.coordinates.y, event.result) for event in shot_dataset.events])

pitch = Pitch(figsize=(10, 8), pitch_type='uefa')
fig, ax = pitch.draw()
sc = pitch.scatter(x, y,
                   c=['green' if result == ShotResult.GOAL else 'blue' for result in results],
                   s=200, label='scatter', ax=ax)

In [None]:
# check pitch dimensions
dataset.metadata.pitch_dimensions

# Preparing data


## Standarizing data

First step: correct the pitch dimensions to match the UEFA dimensions. This will stretch the x-axes.

In [None]:
shot_dataset_prepared = (
    shot_dataset
    .transform(
        to_pitch_dimensions=[[0, 105], [0, 68]]
    )
)

In [None]:
x, y, results = zip(*[(event.coordinates.x, event.coordinates.y, event.result) for event in shot_dataset_prepared.events])

pitch = Pitch(figsize=(10, 8), pitch_type='uefa')
fig, ax = pitch.draw()
sc = pitch.scatter(x, y,
                   c=['green' if result == ShotResult.GOAL else 'blue' for result in results],
                   s=200, label='scatter', ax=ax)

Second step: make sure all shots have the same orientation.

In [None]:
shot_dataset_prepared = (
    shot_dataset
    .transform(
        to_pitch_dimensions=[[0, 105], [0, 68]],
        to_orientation=Orientation.HOME_TEAM
    )
)

In [None]:
x, y, results = zip(*[(event.coordinates.x, event.coordinates.y, event.result) for event in shot_dataset_prepared.events])

pitch = Pitch(figsize=(10, 8), pitch_type='uefa')
fig, ax = pitch.draw()
sc = pitch.scatter(x, y,
                   c=['green' if result == ShotResult.GOAL else 'blue' for result in results],
                   s=200, label='scatter', ax=ax)

## Enriching data: adding state

Process of adding calculated data

In [None]:
# Lineup and sequence state require ALL events to be in: filtering must be applied AFTER adding state to events
shot_dataset_enriched = (
    dataset
    .add_state('score', 'sequence', 'lineup')
    .filter(
        lambda event: event.event_type == EventType.SHOT and event.team == home_team
    )
    .transform(
        to_pitch_dimensions=[[0, 105], [0, 68]],
        to_orientation=Orientation.HOME_TEAM
    )
)

substituted_player = home_team.get_player_by_id('DFL-OBJ-002FVJ')
print(substituted_player)

In [None]:
for shot in shot_dataset_enriched.events:
    print(f"{str(shot.player):<20} - "
          f"{shot.state['score']} - "
          f"{substituted_player in shot.state['lineup'].players}")

In [None]:
shot_dataset_enriched.to_pandas()

In [None]:
dataframe = shot_dataset_enriched.to_pandas(
    additional_columns={
        'score': lambda event: str(event.state['score']),
        'player': lambda event: str(event.player)
    }
)

dataframe[['player', 'score', 'event_type', 'result']]

## Playing time

In [None]:
from itertools import groupby
from collections import Counter

dataset_with_lineup = dataset.add_state('lineup')


playing_time_per_player = Counter()
for (period, lineup), events in groupby(dataset_with_lineup.events, 
                                        key=lambda event: (
                                            event.period, 
                                            event.state['lineup'].players
                                        )):
    events = list(events)
    length = events[-1].timestamp - events[0].timestamp
    playing_time_per_player.update({
        player: length
        for player in lineup
    })

In [None]:
for player, playing_seconds in sorted(playing_time_per_player.items(), key=lambda item: -1 * item[1]):
    if player.team == home_team:
        print(f"{str(player):<20} {playing_seconds/60:02.0f} minutes")