# Pysoccer library

## 1. Import libraries

In [None]:
from pysoccer.attributes import *
from pysoccer.event import *
from pysoccer.serializers.WyscoutSerializer.eventSerializer import *
from pysoccer.serializers.WyscoutSerializer.matchSerializer import *
import json
from collections import defaultdict
from collections import Counter
import numpy as np
import operator
import matplotlib.pyplot as plt 
import pandas as pd
from pysoccer.visualization import createPitch

## 2. Load public dataset

We import the *matches* and *event* data sets from the data folder we stored them before.

In [None]:
matches, events = {}, {}
with open('./data/events/events_%s.json' %'Italy') as json_data:
    events = json.load(json_data)
with open('./data/matches/matches_%s.json' %'Italy') as json_data:
    matches = json.load(json_data)

We now create two dictionaries containing *matches* and *event*. We use respectively the *wyId* and the *matchID* as keys for the values.

In [None]:
match_id2events = defaultdict(list)
match_id2match = defaultdict(dict)
for event in events:
    match_id = event['matchId']
    match_id2events[match_id].append(event)                                     
for match in matches:
    match_id = match['wyId']
    match_id2match[match_id] = match

## 3. Structure of data

### 3.A. Matches

In [None]:
a_match = list(match_id2match.keys())[0]
match_id2match[a_match]

### 3.B. Events

In [None]:
match_id2events[a_match][0]

## 4. Serializing

### 4.A. Serializing Events

We then create a new object *WyscoutSerializer* that will do the serialization for us. We can call the *serialize* function that needs a list of Wyscout events as input and will return a list of standardized events.

In [None]:
event_serializer = WyscoutEventSerializer()
events = event_serializer.serialize(match_id2events[a_match])

In *events* now we will find our list of standardized events. Below you can see how a standardized event looks.

In [None]:
events[0]

We see that it is a *PossessionEvent*, precisely a pass. It was accurate and the ball passed from the player with id 3344 to the player with id 116349.

### 3.B. Serializing Matches

In the same way we did for events we can create a new object *WyscoutMatchSerializer* that will serializer our matches. We call the *serialize* function that needs a dict containing Wyscout matches as values and Wyscout match_ids as keys as input. Then the function will return a dictionary containing serialized matches as values and the same keys.

In [None]:
match_serializer = WyscoutMatchSerializer()
matches = match_serializer.serialize(match_id2match)

We can see that the *matches*'s value for the *a_match* key is a standardized match that looks as follow.

In [None]:
matches[a_match]

It's the *Lazio - Internazionale* match, played in the last round of the 2018 Serie A.

## 5. Basic statistics

We can try to serialize all the events in the data set.

In [None]:
serialized_events = {}
for id,events in match_id2events.items():
    serialized_events[id] = event_serializer.serialize(events)

### 5.A. Frequency of label per type

For each label we plot how frequent it is in the serialized data set.

In [None]:
events_list = []
for match_id, events in serialized_events.items():
    for event in events:
        events_list.append(event.label.split("-")[0])

count = Counter(events_list)
counter = {}
for i, n_events in zip(count.keys(), count.values()):
    counter[i] = int(float(n_events)/len(events_list)*100)
sorted_d = np.array(sorted(counter.items(), key=operator.itemgetter(1), reverse=False))

fig, ax = plt.subplots(figsize=(8,6))
plt.barh(list(sorted_d[:,0]),[int(x) for x in list(sorted_d[:,1])])
plt.xticks(rotation=90);plt.xticks(fontsize=18)
plt.xlabel('events (%)', fontsize=25)
plt.yticks(fontsize=18)
plt.grid(alpha=0.3)
fig.tight_layout()
plt.show()

### 5.B. Distribution of the number of events per match

We can compute the number of events in each match. We the plot the distribution of this quantity.

In [None]:
values = [len(events) for events in serialized_events.values()]
avg_n_events = np.mean(values)
std_n_events = np.std(values)

fig, ax = plt.subplots(figsize=(8,6))
plt.hist(values, rwidth=0.9, bins=20)
plt.grid(alpha=0.3)
plt.yticks(fontsize=18)
plt.xlabel('events (n)', fontsize=25)
plt.ylabel('frequency (n)', fontsize=25)
plt.xticks(fontsize=18)
plt.text(0.1, 0.7,
        '$\mu$ = %i\n$\sigma$ = %i'%(avg_n_events, std_n_events),
        fontsize=25, transform=ax.transAxes)
fig.tight_layout()
plt.show()

## 6. Plot events on the field

It is possible to create a *pandas DataFrame* with the standardized event's attributes. Let's take an Italian Serie A match and create a *DataFrame* for its events.

In [None]:
match_id = 2576335
match_events = []
for event in serialized_events[match_id]:
    match_events.append(event.to_dict())

match_df = pd.DataFrame(match_events)
match_df.head()

Then we can plot them on a pitch.

In [None]:
(fig,ax) = createPitch(120,80,'yards','gray')
for i,event in match_df.iterrows():
    if (event['team_id']==3161):
        x=((event['start_position'].x)*120)/100
        y=((event['start_position'].y)*80)/100
        shotCircle=plt.Circle((x,y),1,color='red')
    elif (event['team_id']==3162):
        x=((100-event['start_position'].x)*120)/100
        y=((100-event['start_position'].y)*80)/100
        shotCircle=plt.Circle((x,y),1,color='blue')
    ax.add_patch(shotCircle)

plt.show()

If we want to visualize all the shot that the players took during the match, it is possible to select them from the *DataFrame* and plot them as well. To add something interesting we also highlight the goals.

In [None]:
(fig,ax) = createPitch(120,80,'yards','gray')

for event in serialized_events[match_id]:
    try:
        if event.is_shot:
            if event.team_id == 3161:
                point = event.get_position(120,80,'R')
                shotCircle = plt.Circle((point.x,point.y),2,color='red')
            elif event.team_id == 3162:
                point = event.get_position(120,80,'L')
                shotCircle = plt.Circle((point.x,point.y),2,color='blue')
            if not event.is_goal:
                shotCircle.set_alpha(.2)
            ax.add_patch(shotCircle)
    except AttributeError:
        pass

plt.show()
        

The chosen match is Lazio-Internazionale 3-2. Our plot is not incorrect, it shows four goals beacuse during the match Ivan Perisic scored an own goal. It is interesting to see the shot taken during a penalty that took to a goal.

In [None]:
match_accurate = []
match_passes = []
for a_match in list(matches.keys()):
    accurate = 0
    passes = 0
    if '3161' in [matches[a_match].home_team.team_id, matches[a_match].away_team.team_id]:
        for event in serialized_events[a_match]:
            if event.team_id == 3161 and event.label.split("-")[0] == 'Pass':
                passes += 1
                if event.outcome == 'success':
                    accurate += 1
        match_passes.append(passes)
        match_accurate.append(accurate)

accurate_percentage = []

for accurate,passes in zip(match_accurate, match_passes):
    percentage = (accurate*100)/passes
    accurate_percentage.append(int(percentage))

count = Counter(accurate_percentage)
counter = {}
for i, n_percentages in zip(count.keys(), count.values()):
    counter[i] = int(float(n_percentages)/len(accurate_percentage)*100)
sorted_d = np.array(sorted(counter.items(), key=operator.itemgetter(1), reverse=False))

fig, ax = plt.subplots(figsize=(8,6))
plt.barh(list(sorted_d[:,0]),[int(x) for x in list(sorted_d[:,1])])
plt.xticks(rotation=90);plt.xticks(fontsize=18)
plt.xlabel('matches with that percentage (%)', fontsize=25)
plt.ylabel('accurate passes in a match (%)', fontsize=25)
plt.yticks(fontsize=18)
plt.grid(alpha=0.3)
fig.tight_layout()
plt.show()

In [None]:
match_accurate_shots = []
match_shots = []
for a_match in list(matches.keys()):
    accurate = 0
    shots = 0
    if '3161' in [matches[a_match].home_team.team_id, matches[a_match].away_team.team_id]:
        for event in serialized_events[a_match]:
            if event.team_id == 3161 and event.label.split("-")[0] == 'Shot':
                shots += 1
                if event.outcome == 'success':
                    accurate += 1
        match_shots.append(shots)
        match_accurate_shots.append(accurate)

accurate_percentage = []

for accurate,shots in zip(match_accurate_shots, match_shots):
    percentage = (accurate*100)/shots
    accurate_percentage.append(int(percentage))
    
count = Counter(accurate_percentage)
counter = {}
for i, n_percentages in zip(count.keys(), count.values()):
    counter[i] = int(float(n_percentages)/len(accurate_percentage)*100)
sorted_d = np.array(sorted(counter.items(), key=operator.itemgetter(1), reverse=False))

fig, ax = plt.subplots(figsize=(8,6))
plt.barh(list(sorted_d[:,0]),[int(x) for x in list(sorted_d[:,1])])
plt.xticks(rotation=90);plt.xticks(fontsize=18)
plt.xlabel('matches with that percentage (%)', fontsize=25)
plt.ylabel('accurate shots in a match (%)', fontsize=25)
plt.yticks(fontsize=18)
plt.grid(alpha=0.3)
fig.tight_layout()
plt.show()