In [1]:
from mplsoccer.pitch import Pitch, VerticalPitch
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
from highlight_text import HighlightText, ax_text, fig_text
import numpy as np
from matplotlib.colors import LinearSegmentedColormap
from scipy.ndimage import gaussian_filter
import matplotlib.patheffects as path_effects

In [2]:
data = pd.read_csv("epl_22_23.csv" , low_memory=False)
data['x'] = data['x']*1.2
data['y'] = data['y']*.8
data['endX'] = data['endX']*1.2
data['endY'] = data['endY']*.8

In [3]:
def add_carries(game_df):
   min_dribble_length: float = 0.0
   max_dribble_length: float = 100.0
   max_dribble_duration: float = 20.0
   game_df['time_seconds']=game_df['minute']*60+game_df['second']
   next_actions = data.shift(-1)
   same_game=game_df.matchId==next_actions.matchId
   same_team = game_df.teamId == next_actions.teamId
   dx = game_df.endX - next_actions.x
   dy = game_df.endY- next_actions.y
   far_enough=dy** 2 + dy** 2 >= min_dribble_length** 2
   not_too_far = dx ** 2+ dy** 2<= max_dribble_length ** 2
   dt = next_actions.time_seconds - game_df.time_seconds
   same_phase = dt < max_dribble_duration
   same_period = game_df.period == next_actions.period
   dribble_idx = same_team & far_enough & not_too_far & same_phase & same_period&same_game
   dribbles = pd.DataFrame()
   prev = game_df[dribble_idx]
   nex= next_actions[dribble_idx]
   dribbles['matchId' ] = nex.matchId
   dribbles['period' ] = nex.period
   for cols in ['expandedMinute']:
      dribbles[cols] = nex[cols]
   for cols in ['passKey','assist']:
      dribbles[ cols] = [True for _ in range (len (dribbles))]
   dribbles['isTouch'] = [True for _ in range (len (dribbles))]
   morecols = ['playerId', 'team_name','playerName','h_a']
   for cols in morecols:
      dribbles[cols] = nex[cols]
   dribbles['action_id'] = prev.eventId + 0.1
   dribbles['time_seconds' ] = (prev.time_seconds + nex.time_seconds) / 2
   dribbles['teamId'] = nex.teamId
   dribbles['playerId']= nex.playerId
   dribbles['playerName' ]= nex.playerName
   dribbles['x'] = prev.endX
   dribbles['y']= prev.endY
   dribbles['endX'] = nex.x
   dribbles['endY'] = nex.y
   dribbles['type'] = ['Carry' for _ in range(len(dribbles))]
   dribbles['outcomeType'] = ['Successful' for _ in range(len(dribbles))]
   dribbles['qualifiers']=[{} for _ in range(len(dribbles))]
   game_df=pd.concat([game_df,dribbles], ignore_index=True,sort=False)
   game_df=game_df.sort_values(['matchId','period','action_id']).reset_index(drop=True)
   game_df['action_id']=range(len(game_df))
   return game_df

In [4]:
data = add_carries(data)

In [5]:
data_carries = data[data['type'] == 'Carry']

In [6]:
data['take_on_won'] = (data['type'] == 'TakeOn') & (data['outcomeType'] == 'Successful')
data['take_on_lost'] = (data['type'] == 'TakeOn') & (data['outcomeType'] == 'Unsuccessful')

In [7]:
data['ball_won'] = (data['type'] == 'BallRecovery')

In [10]:
left_halfspace_x_min = 60
left_halfspace_x_max = 60 + 42
left_halfspace_y_min = 50
left_halfspace_y_max = 50 + 12
    
    
right_halfspace_x_min = 60
right_halfspace_x_max = 60 + 42
right_halfspace_y_min = 18
right_halfspace_y_max = 18 + 12


# Add 'in_rhs' column to data_succ_prog
data['in_rhs'] = (data['x'].between(right_halfspace_x_min, right_halfspace_x_max) & 
                       data['y'].between(right_halfspace_y_min, right_halfspace_y_max))

# Add 'in_lhs' column to data_succ_prog
data['in_lhs'] = (data['x'].between(left_halfspace_x_min, left_halfspace_x_max) & 
                       data['y'].between(left_halfspace_y_min, left_halfspace_y_max))

# Add 'into_rhs' column to data_succ_prog
data['into_rhs'] = (data['endX'].between(right_halfspace_x_min, right_halfspace_x_max) & 
                         data['endY'].between(right_halfspace_y_min, right_halfspace_y_max) & 
                         (data['in_rhs'] == False))

# Add 'into_lhs' column to data_succ_prog
data['into_lhs'] = (data['endX'].between(left_halfspace_x_min, left_halfspace_x_max) & 
                         data['endY'].between(left_halfspace_y_min, left_halfspace_y_max) & 
                         (data['in_lhs'] == False))

# Add 'in_rhs' column to data_carries_succ_prog
data_carries['in_rhs'] = (data_carries['x'].between(right_halfspace_x_min, right_halfspace_x_max) & 
                          data_carries['y'].between(right_halfspace_y_min, right_halfspace_y_max))

# Add 'in_lhs' column to data_carries_succ_prog
data_carries['in_lhs'] = (data_carries['x'].between(left_halfspace_x_min, left_halfspace_x_max) & 
                          data_carries['y'].between(left_halfspace_y_min, left_halfspace_y_max))

# Add 'into_rhs' column to data_carries_succ_prog
data_carries['into_rhs'] = (data_carries['endX'].between(right_halfspace_x_min, right_halfspace_x_max) & 
                            data_carries['endY'].between(right_halfspace_y_min, right_halfspace_y_max) & 
                            (data_carries['in_rhs'] == False))

# Add 'into_lhs' column to data_carries_succ_prog
data_carries['into_lhs'] = (data_carries['endX'].between(left_halfspace_x_min, left_halfspace_x_max) & 
                            data_carries['endY'].between(left_halfspace_y_min, left_halfspace_y_max) & 
                            (data_carries['in_lhs'] == False))


data['take_on_won_lhs'] = (data['take_on_won'] == True) & (data['in_lhs'] == True)
data['take_on_lost_lhs'] = (data['take_on_lost'] == True) & (data['in_lhs'] == True)
data['ball_won_lhs'] = (data['type'] == 'BallRecovery') & (data['in_lhs'] == True)

data['take_on_won_rhs'] = (data['take_on_won'] == True) & (data['in_rhs'] == True)
data['take_on_lost_rhs'] = (data['take_on_lost'] == True) & (data['in_rhs'] == True)
data['ball_won_rhs'] = (data['type'] == 'BallRecovery') & (data['in_rhs'] == True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_carries['in_rhs'] = (data_carries['x'].between(right_halfspace_x_min, right_halfspace_x_max) &
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_carries['in_lhs'] = (data_carries['x'].between(left_halfspace_x_min, left_halfspace_x_max) &
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_ca

In [12]:
actions_z14 = data.groupby(['playerId', 'playerName', 'team_name']).agg(
    in_lhs=('in_lhs', 'sum'),
    into_lhs=('into_lhs', 'sum'),
    take_on_won_lhs=('take_on_won_lhs', 'sum'),
    take_on_lost_lhs=('take_on_lost_lhs', 'sum'),
    ball_won_lhs=('ball_won_lhs', 'sum'),
    
    in_rhs=('in_rhs', 'sum'),
    into_rhs=('into_rhs', 'sum'),
    take_on_won_rhs=('take_on_won_rhs', 'sum'),
    take_on_lost_rhs=('take_on_lost_rhs', 'sum'),
    ball_won_rhs=('ball_won_rhs', 'sum')
).reset_index()


actions_z14['take_on_%_lhs'] = (actions_z14['take_on_won_lhs'] / (actions_z14['take_on_won_lhs'] + actions_z14['take_on_lost_lhs'])) * 100
actions_z14['take_on_%_rhs'] = (actions_z14['take_on_won_rhs'] / (actions_z14['take_on_won_rhs'] + actions_z14['take_on_lost_rhs'])) * 100


In [13]:
actions_z14

Unnamed: 0,playerId,playerName,team_name,in_lhs,into_lhs,take_on_won_lhs,take_on_lost_lhs,ball_won_lhs,in_rhs,into_rhs,take_on_won_rhs,take_on_lost_rhs,ball_won_rhs,take_on_%_lhs,take_on_%_rhs
0,4511.0,James Milner,Liverpool,76,25,2,0,4,65,52,0,1,1,100.0,0.000000
1,5583.0,Cristiano Ronaldo,Man Utd,67,13,0,1,0,43,10,0,0,2,0.0,
2,8166.0,Ashley Young,Aston Villa,25,35,0,0,0,89,87,0,0,4,,
3,13796.0,Theo Walcott,Southampton,9,7,0,0,1,74,26,1,2,1,,33.333333
4,16161.0,Joao Moutinho,Wolves,204,59,1,1,8,260,74,0,0,5,50.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,470690.0,Yasin Ayari,Brighton,5,3,0,0,0,8,0,0,0,0,,
565,474264.0,Lewis Miley,Newcastle,0,1,0,0,0,2,0,0,0,0,,
566,475756.0,Kamari Doyle,Southampton,0,0,0,0,0,1,0,0,0,0,,
567,479809.0,George Abbott,Tottenham,0,0,0,0,0,2,0,0,0,0,,


In [14]:
carries_z14 = data_carries.groupby(['playerId', 'playerName', 'team_name']).agg(
    in_lhs=('in_lhs', 'sum'),
    into_lhs=('into_lhs', 'sum'), 
    in_rhs=('in_rhs', 'sum'),
    into_rhs=('into_rhs', 'sum')
).reset_index()

In [15]:
carries_z14

Unnamed: 0,playerId,playerName,team_name,in_lhs,into_lhs,in_rhs,into_rhs
0,4511.0,James Milner,Liverpool,24,9,18,8
1,5583.0,Cristiano Ronaldo,Man Utd,24,4,17,5
2,8166.0,Ashley Young,Aston Villa,9,5,30,11
3,13796.0,Theo Walcott,Southampton,1,2,27,5
4,16161.0,Joao Moutinho,Wolves,76,13,98,16
...,...,...,...,...,...,...,...
561,470690.0,Yasin Ayari,Brighton,1,0,4,0
562,474264.0,Lewis Miley,Newcastle,0,0,1,0
563,475756.0,Kamari Doyle,Southampton,0,0,0,0
564,479809.0,George Abbott,Tottenham,0,0,1,0


In [23]:
cols1 = ['playerName' , 'team_name' , 'in_lhs' , 'into_lhs' , 'take_on_won_lhs' , 'take_on_%_lhs' , 'ball_won_lhs']
cols4 = ['playerName' , 'team_name' , 'in_rhs' , 'into_rhs' , 'take_on_won_rhs' , 'take_on_%_rhs' , 'ball_won_rhs']
cols2 = ['playerName' , 'team_name' , 'in_zoneF' , 'into_zoneF']
cols3= ['playerName' , 'team_name' , 'ball_won_lhs']

In [24]:
lhs_actions_disp = actions_z14[cols3]
rhs_actions_disp = actions_z14[cols4]

In [25]:
lhs_actions_disp

Unnamed: 0,playerName,team_name,ball_won_lhs
0,James Milner,Liverpool,4
1,Cristiano Ronaldo,Man Utd,0
2,Ashley Young,Aston Villa,0
3,Theo Walcott,Southampton,1
4,Joao Moutinho,Wolves,8
...,...,...,...
564,Yasin Ayari,Brighton,0
565,Lewis Miley,Newcastle,0
566,Kamari Doyle,Southampton,0
567,George Abbott,Tottenham,0


In [27]:
lhs_actions_disp = lhs_actions_disp.sort_values('ball_won_lhs' , ascending=False)
lhs_actions_disp.head(10)

Unnamed: 0,playerName,team_name,ball_won_lhs
380,Alexis Mac Allister,Brighton,30
300,Rodri,Man City,27
85,Granit Xhaka,Arsenal,22
182,Bruno Fernandes,Man Utd,21
64,Ilkay GÃ¼ndogan,Man City,19
349,Declan Rice,West Ham,18
363,Mathias Jensen,Brentford,18
163,Fabinho,Liverpool,17
256,Oleksandr Zinchenko,Arsenal,17
511,Moises Caicedo,Brighton,16


In [17]:
z14_carries = carries_z14[cols2]
z14_carries.sort_values('into_zoneF' , ascending=False)

Unnamed: 0,playerName,team_name,in_zoneF,into_zoneF
133,Stanislav Lobotka,Napoli,126,38
341,Davide Frattesi,Sassuolo,81,33
472,Lazar Samardzic,Udinese,63,32
387,Rafael Leão,AC Milan,59,32
308,Brahim Díaz,AC Milan,65,30
...,...,...,...,...
154,Pawel Dawidowicz,Verona,2,0
153,Charalampos Lykogiannis,Bologna,5,0
152,Ivan Provedel,Lazio,0,0
448,Szymon Zurkowski,Fiorentina,4,0
