In [1]:
from mplsoccer.pitch import Pitch, VerticalPitch
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
from highlight_text import HighlightText, ax_text, fig_text
import numpy as np
from matplotlib.colors import LinearSegmentedColormap
from scipy.ndimage import gaussian_filter
import matplotlib.patheffects as path_effects

In [2]:
data = pd.read_csv("Serie_A_22_23.csv" , low_memory=False)
data['x'] = data['x']*1.2
data['y'] = data['y']*.8
data['endX'] = data['endX']*1.2
data['endY'] = data['endY']*.8

In [3]:
def add_carries(game_df):
   min_dribble_length: float = 0.0
   max_dribble_length: float = 100.0
   max_dribble_duration: float = 20.0
   game_df['time_seconds']=game_df['minute']*60+game_df['second']
   next_actions = data.shift(-1)
   same_game=game_df.matchId==next_actions.matchId
   same_team = game_df.teamId == next_actions.teamId
   dx = game_df.endX - next_actions.x
   dy = game_df.endY- next_actions.y
   far_enough=dy** 2 + dy** 2 >= min_dribble_length** 2
   not_too_far = dx ** 2+ dy** 2<= max_dribble_length ** 2
   dt = next_actions.time_seconds - game_df.time_seconds
   same_phase = dt < max_dribble_duration
   same_period = game_df.period == next_actions.period
   dribble_idx = same_team & far_enough & not_too_far & same_phase & same_period&same_game
   dribbles = pd.DataFrame()
   prev = game_df[dribble_idx]
   nex= next_actions[dribble_idx]
   dribbles['matchId' ] = nex.matchId
   dribbles['period' ] = nex.period
   for cols in ['expandedMinute']:
      dribbles[cols] = nex[cols]
   for cols in ['passKey','assist']:
      dribbles[ cols] = [True for _ in range (len (dribbles))]
   dribbles['isTouch'] = [True for _ in range (len (dribbles))]
   morecols = ['playerId', 'team_name','playerName','h_a']
   for cols in morecols:
      dribbles[cols] = nex[cols]
   dribbles['action_id'] = prev.eventId + 0.1
   dribbles['time_seconds' ] = (prev.time_seconds + nex.time_seconds) / 2
   dribbles['teamId'] = nex.teamId
   dribbles['playerId']= nex.playerId
   dribbles['playerName' ]= nex.playerName
   dribbles['x'] = prev.endX
   dribbles['y']= prev.endY
   dribbles['endX'] = nex.x
   dribbles['endY'] = nex.y
   dribbles['type'] = ['Carry' for _ in range(len(dribbles))]
   dribbles['outcomeType'] = ['Successful' for _ in range(len(dribbles))]
   dribbles['qualifiers']=[{} for _ in range(len(dribbles))]
   game_df=pd.concat([game_df,dribbles], ignore_index=True,sort=False)
   game_df=game_df.sort_values(['matchId','period','action_id']).reset_index(drop=True)
   game_df['action_id']=range(len(game_df))
   return game_df

In [4]:
data = add_carries(data)

In [5]:
data_carries = data[data['type'] == 'Carry']

In [6]:
data['take_on_won'] = (data['type'] == 'TakeOn') & (data['outcomeType'] == 'Successful')
data['take_on_lost'] = (data['type'] == 'TakeOn') & (data['outcomeType'] == 'Unsuccessful')

In [7]:
data['ball_won'] = (data['type'] == 'BallRecovery')

In [8]:
# Define the Zone 14 coordinates
zone14_x_min = 78
zone14_x_max = 78 + 24
zone14_y_min = 30
zone14_y_max = 30 + 20

# Add 'in_zoneF' column to data_succ_prog
data['in_zoneF'] = (data['x'].between(zone14_x_min, zone14_x_max) & 
                              data['y'].between(zone14_y_min, zone14_y_max))

# Add 'in_zoneF' column to data_carries_succ_prog
data_carries['in_zoneF'] = (data_carries['x'].between(zone14_x_min, zone14_x_max) & 
                                      data_carries['y'].between(zone14_y_min, zone14_y_max))


# Add 'into_zoneF' column to data_succ_prog
data['into_zoneF'] = (data['endX'].between(zone14_x_min, zone14_x_max) & 
                              data['endY'].between(zone14_y_min, zone14_y_max) & (data['in_zoneF'] == False))

# Add 'into_zoneF' column to data_carries_succ_prog
data_carries['into_zoneF'] = (data_carries['endX'].between(zone14_x_min, zone14_x_max) & 
                                      data_carries['endY'].between(zone14_y_min, zone14_y_max) & (data_carries['in_zoneF'] == False))



data['take_on_won_z14'] = (data['take_on_won'] == True) & (data['in_zoneF'] == True)
data['take_on_lost_z14'] = (data['take_on_lost'] == True) & (data['in_zoneF'] == True)
data['ball_won_z14'] = (data['type'] == 'BallRecovery') & (data['in_zoneF'] == True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_carries['in_zoneF'] = (data_carries['x'].between(zone14_x_min, zone14_x_max) &
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_carries['into_zoneF'] = (data_carries['endX'].between(zone14_x_min, zone14_x_max) &


In [9]:
actions_z14 = data.groupby(['playerId', 'playerName', 'team_name']).agg(
    in_zoneF=('in_zoneF', 'sum'),
    into_zoneF=('into_zoneF', 'sum'),
    take_on_won_z14=('take_on_won_z14', 'sum'),
    take_on_lost_z14=('take_on_lost_z14', 'sum'),
    ball_won_z14=('ball_won_z14', 'sum')
).reset_index()


actions_z14['take_on_%'] = (actions_z14['take_on_won_z14'] / (actions_z14['take_on_won_z14'] + actions_z14['take_on_lost_z14'])) * 100


In [10]:
actions_z14

Unnamed: 0,playerId,playerName,team_name,in_zoneF,into_zoneF,take_on_won_z14,take_on_lost_z14,ball_won_z14,take_on_%
0,3281.0,Zlatan Ibrahimovic,AC Milan,30,3,0,1,0,0.0
1,9016.0,Franck Ribéry,Salernitana,1,1,0,1,0,0.0
2,9730.0,Fabio Quagliarella,Sampdoria,83,17,0,1,3,0.0
3,9972.0,Samir Handanovic,Inter,0,11,0,0,0,
4,11895.0,Adama Soumaoro,Bologna,13,19,0,0,1,
...,...,...,...,...,...,...,...,...,...
598,469986.0,Jordan Majchrzak,Roma,1,1,0,0,0,
599,474446.0,Mihailo Ivanovic,Sampdoria,0,0,0,0,0,
600,476440.0,Niccolò Pisilli,Roma,0,0,0,0,0,
601,477978.0,Alberto Basso,Cremonese,1,0,0,0,0,


In [11]:
carries_z14 = data_carries.groupby(['playerId', 'playerName', 'team_name']).agg(
    in_zoneF=('in_zoneF', 'sum'),
    into_zoneF=('into_zoneF', 'sum')
).reset_index()

In [12]:
carries_z14

Unnamed: 0,playerId,playerName,team_name,in_zoneF,into_zoneF
0,3281.0,Zlatan Ibrahimovic,AC Milan,12,1
1,9016.0,Franck Ribéry,Salernitana,0,1
2,9730.0,Fabio Quagliarella,Sampdoria,31,3
3,9972.0,Samir Handanovic,Inter,0,0
4,11895.0,Adama Soumaoro,Bologna,1,3
...,...,...,...,...,...
593,464210.0,Lukas Vorlicky,Atalanta,4,0
594,469926.0,Flavio Paoletti,Sampdoria,6,2
595,469986.0,Jordan Majchrzak,Roma,0,0
596,477978.0,Alberto Basso,Cremonese,0,0


In [13]:
cols1 = ['playerName' , 'team_name' , 'in_zoneF' , 'into_zoneF' , 'take_on_won_z14' , 'take_on_%' , 'ball_won_z14']
cols2 = ['playerName' , 'team_name' , 'in_zoneF' , 'into_zoneF']
cols3= ['playerName' , 'team_name' , 'ball_won_z14']

In [14]:
z14_actions_disp = actions_z14[cols1]

In [15]:
z14_actions_disp

Unnamed: 0,playerName,team_name,in_zoneF,into_zoneF,take_on_won_z14,take_on_%,ball_won_z14
0,Zlatan Ibrahimovic,AC Milan,30,3,0,0.0,0
1,Franck Ribéry,Salernitana,1,1,0,0.0,0
2,Fabio Quagliarella,Sampdoria,83,17,0,0.0,3
3,Samir Handanovic,Inter,0,11,0,,0
4,Adama Soumaoro,Bologna,13,19,0,,1
...,...,...,...,...,...,...,...
598,Jordan Majchrzak,Roma,1,1,0,,0
599,Mihailo Ivanovic,Sampdoria,0,0,0,,0
600,Niccolò Pisilli,Roma,0,0,0,,0
601,Alberto Basso,Cremonese,1,0,0,,0


In [18]:
z14_actions_disp = z14_actions_disp.sort_values('take_on_won_z14' , ascending=False)
z14_actions_disp.head(20)

Unnamed: 0,playerName,team_name,in_zoneF,into_zoneF,take_on_won_z14,take_on_%,ball_won_z14
284,Lautaro Martínez,Inter,328,64,9,56.25,7
303,Luka Jovic,Fiorentina,149,22,8,88.888889,4
391,André-Frank Zambo Anguissa,Napoli,299,104,7,87.5,15
388,Rafael Leão,AC Milan,181,76,7,58.333333,2
301,David Okereke,Cremonese,169,41,7,70.0,1
96,Felipe Anderson,Lazio,245,67,7,53.846154,4
262,Sergej Milinkovic-Savic,Lazio,341,79,6,42.857143,6
473,Lazar Samardzic,Udinese,197,84,6,42.857143,3
456,Beto,Udinese,181,34,6,42.857143,4
216,Lorenzo Pellegrini,Roma,236,58,6,50.0,4


In [17]:
z14_carries = carries_z14[cols2]
z14_carries.sort_values('into_zoneF' , ascending=False)

Unnamed: 0,playerName,team_name,in_zoneF,into_zoneF
133,Stanislav Lobotka,Napoli,126,38
341,Davide Frattesi,Sassuolo,81,33
472,Lazar Samardzic,Udinese,63,32
387,Rafael Leão,AC Milan,59,32
308,Brahim Díaz,AC Milan,65,30
...,...,...,...,...
154,Pawel Dawidowicz,Verona,2,0
153,Charalampos Lykogiannis,Bologna,5,0
152,Ivan Provedel,Lazio,0,0
448,Szymon Zurkowski,Fiorentina,4,0
