In [1]:
from mplsoccer.pitch import Pitch, VerticalPitch
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
from highlight_text import HighlightText, ax_text, fig_text
import numpy as np
from matplotlib.colors import LinearSegmentedColormap
from scipy.ndimage import gaussian_filter
import matplotlib.patheffects as path_effects

In [2]:
data = pd.read_csv("epl_22_23.csv" , low_memory=False)
data['x'] = data['x']*1.2
data['y'] = data['y']*.8
data['endX'] = data['endX']*1.2
data['endY'] = data['endY']*.8

In [3]:
def add_carries(game_df):
   min_dribble_length: float = 0.0
   max_dribble_length: float = 100.0
   max_dribble_duration: float = 20.0
   game_df['time_seconds']=game_df['minute']*60+game_df['second']
   next_actions = data.shift(-1)
   same_game=game_df.matchId==next_actions.matchId
   same_team = game_df.teamId == next_actions.teamId
   dx = game_df.endX - next_actions.x
   dy = game_df.endY- next_actions.y
   far_enough=dy** 2 + dy** 2 >= min_dribble_length** 2
   not_too_far = dx ** 2+ dy** 2<= max_dribble_length ** 2
   dt = next_actions.time_seconds - game_df.time_seconds
   same_phase = dt < max_dribble_duration
   same_period = game_df.period == next_actions.period
   dribble_idx = same_team & far_enough & not_too_far & same_phase & same_period&same_game
   dribbles = pd.DataFrame()
   prev = game_df[dribble_idx]
   nex= next_actions[dribble_idx]
   dribbles['matchId' ] = nex.matchId
   dribbles['period' ] = nex.period
   for cols in ['expandedMinute']:
      dribbles[cols] = nex[cols]
   for cols in ['passKey','assist']:
      dribbles[ cols] = [True for _ in range (len (dribbles))]
   dribbles['isTouch'] = [True for _ in range (len (dribbles))]
   morecols = ['playerId', 'team_name','playerName','h_a']
   for cols in morecols:
      dribbles[cols] = nex[cols]
   dribbles['action_id'] = prev.eventId + 0.1
   dribbles['time_seconds' ] = (prev.time_seconds + nex.time_seconds) / 2
   dribbles['teamId'] = nex.teamId
   dribbles['playerId']= nex.playerId
   dribbles['playerName' ]= nex.playerName
   dribbles['x'] = prev.endX
   dribbles['y']= prev.endY
   dribbles['endX'] = nex.x
   dribbles['endY'] = nex.y
   dribbles['type'] = ['Carry' for _ in range(len(dribbles))]
   dribbles['outcomeType'] = ['Successful' for _ in range(len(dribbles))]
   dribbles['qualifiers']=[{} for _ in range(len(dribbles))]
   game_df=pd.concat([game_df,dribbles], ignore_index=True,sort=False)
   game_df=game_df.sort_values(['matchId','period','action_id']).reset_index(drop=True)
   game_df['action_id']=range(len(game_df))
   return game_df

In [4]:
data = add_carries(data)

In [5]:
data_carries = data[data['type'] == 'Carry']

In [6]:
data['take_on_won'] = (data['type'] == 'TakeOn') & (data['outcomeType'] == 'Successful')
data['take_on_lost'] = (data['type'] == 'TakeOn') & (data['outcomeType'] == 'Unsuccessful')
data['tackle_won'] = (data['type'] == 'Tackle') & (data['outcomeType'] == 'Successful')
data['tackle_lost'] = (data['type'] == 'Tackle') & (data['outcomeType'] == 'Unsuccessful')
data['interception'] = (data['type'] == 'Interception') & (data['outcomeType'] == 'Successful')

In [7]:
data['ball_won'] = (data['type'] == 'BallRecovery')

In [8]:
left_halfspace_x_min = 18
left_halfspace_x_max = 18 + 42
left_halfspace_y_min = 50
left_halfspace_y_max = 50 + 12
    
    
right_halfspace_x_min = 18
right_halfspace_x_max = 18 + 42
right_halfspace_y_min = 18
right_halfspace_y_max = 18 + 12


# Add 'in_rhs' column to data_succ_prog
data['in_rhs'] = (data['x'].between(right_halfspace_x_min, right_halfspace_x_max) & 
                       data['y'].between(right_halfspace_y_min, right_halfspace_y_max))

# Add 'in_lhs' column to data_succ_prog
data['in_lhs'] = (data['x'].between(left_halfspace_x_min, left_halfspace_x_max) & 
                       data['y'].between(left_halfspace_y_min, left_halfspace_y_max))

# Add 'into_rhs' column to data_succ_prog
#data['into_rhs'] = (data['endX'].between(right_halfspace_x_min, right_halfspace_x_max) & 
 #                        data['endY'].between(right_halfspace_y_min, right_halfspace_y_max) & 
  #                       (data['in_rhs'] == False))

# Add 'into_lhs' column to data_succ_prog
#data['into_lhs'] = (data['endX'].between(left_halfspace_x_min, left_halfspace_x_max) & 
 #                        data['endY'].between(left_halfspace_y_min, left_halfspace_y_max) & 
  #                       (data['in_lhs'] == False))

# Add 'in_rhs' column to data_carries_succ_prog
data_carries['in_rhs'] = (data_carries['x'].between(right_halfspace_x_min, right_halfspace_x_max) & 
                          data_carries['y'].between(right_halfspace_y_min, right_halfspace_y_max))

# Add 'in_lhs' column to data_carries_succ_prog
data_carries['in_lhs'] = (data_carries['x'].between(left_halfspace_x_min, left_halfspace_x_max) & 
                          data_carries['y'].between(left_halfspace_y_min, left_halfspace_y_max))

# Add 'into_rhs' column to data_carries_succ_prog
#data_carries['into_rhs'] = (data_carries['endX'].between(right_halfspace_x_min, right_halfspace_x_max) & 
 #                           data_carries['endY'].between(right_halfspace_y_min, right_halfspace_y_max) & 
  #                          (data_carries['in_rhs'] == False))

# Add 'into_lhs' column to data_carries_succ_prog
#data_carries['into_lhs'] = (data_carries['endX'].between(left_halfspace_x_min, left_halfspace_x_max) & 
 #                           data_carries['endY'].between(left_halfspace_y_min, left_halfspace_y_max) & 
  #                          (data_carries['in_lhs'] == False))


data['take_on_won_lhs'] = (data['take_on_won'] == True) & (data['in_lhs'] == True)
data['take_on_lost_lhs'] = (data['take_on_lost'] == True) & (data['in_lhs'] == True)
data['ball_won_lhs'] = (data['type'] == 'BallRecovery') & (data['in_lhs'] == True)

data['tackle_won_lhs'] = (data['tackle_won'] == True) & (data['in_lhs'] == True)
data['tackle_lost_lhs'] = (data['tackle_lost'] == True) & (data['in_lhs'] == True)
data['interception_lhs'] = (data['type'] == 'Interception') & (data['in_lhs'] == True)


data['take_on_won_rhs'] = (data['take_on_won'] == True) & (data['in_rhs'] == True)
data['take_on_lost_rhs'] = (data['take_on_lost'] == True) & (data['in_rhs'] == True)
data['ball_won_rhs'] = (data['type'] == 'BallRecovery') & (data['in_rhs'] == True)

data['tackle_won_rhs'] = (data['tackle_won'] == True) & (data['in_rhs'] == True)
data['tackle_lost_rhs'] = (data['tackle_lost'] == True) & (data['in_rhs'] == True)
data['interception_rhs'] = (data['type'] == 'Interception') & (data['in_rhs'] == True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_carries['in_rhs'] = (data_carries['x'].between(right_halfspace_x_min, right_halfspace_x_max) &
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_carries['in_lhs'] = (data_carries['x'].between(left_halfspace_x_min, left_halfspace_x_max) &


In [11]:
actions_lhs = data.groupby(['playerId', 'playerName', 'team_name']).agg(
    in_lhs=('in_lhs', 'sum'),
    #into_lhs=('into_lhs', 'sum'),
    take_on_won_lhs=('take_on_won_lhs', 'sum'),
    take_on_lost_lhs=('take_on_lost_lhs', 'sum'),
    ball_won_lhs=('ball_won_lhs', 'sum'),
    tackle_won_lhs=('tackle_won_lhs', 'sum'),
    tackle_lost_lhs=('tackle_lost_lhs', 'sum'),
    interception_lhs=('interception_lhs', 'sum')
    
).reset_index()


actions_lhs['take_on_%_lhs'] = (actions_lhs['take_on_won_lhs'] / (actions_lhs['take_on_won_lhs'] + actions_lhs['take_on_lost_lhs'])) * 100
actions_lhs['tackle_%_lhs'] = (actions_lhs['tackle_won_lhs'] / (actions_lhs['tackle_won_lhs'] + actions_lhs['tackle_lost_lhs'])) * 100


In [29]:
actions_rhs = data.groupby(['playerId', 'playerName', 'team_name']).agg(
    in_rhs=('in_rhs', 'sum'),
    #into_rhs=('into_lhs', 'sum'),
    take_on_won_rhs=('take_on_won_rhs', 'sum'),
    take_on_lost_rhs=('take_on_lost_rhs', 'sum'),
    ball_won_rhs=('ball_won_rhs', 'sum'),
    tackle_won_rhs=('tackle_won_rhs', 'sum'),
    tackle_lost_rhs=('tackle_lost_rhs', 'sum'),
    interception_rhs=('interception_rhs', 'sum')
    
).reset_index()


actions_rhs['take_on_%_rhs'] = (actions_rhs['take_on_won_rhs'] / (actions_rhs['take_on_won_rhs'] + actions_rhs['take_on_lost_rhs'])) * 100
actions_rhs['tackle_%_rhs'] = (actions_rhs['tackle_won_rhs'] / (actions_rhs['tackle_won_rhs'] + actions_rhs['tackle_lost_rhs'])) * 100


In [30]:
actions_lhs

Unnamed: 0,playerId,playerName,team_name,in_lhs,take_on_won_lhs,take_on_lost_lhs,ball_won_lhs,tackle_won_lhs,tackle_lost_lhs,interception_lhs,take_on_%_lhs,tackle_%_lhs
0,4511.0,James Milner,Liverpool,111,0,0,4,3,0,1,,100.0
1,5583.0,Cristiano Ronaldo,Man Utd,20,0,0,0,0,0,0,,
2,8166.0,Ashley Young,Aston Villa,38,0,0,6,0,0,3,,
3,13796.0,Theo Walcott,Southampton,4,0,0,0,0,0,0,,
4,16161.0,Joao Moutinho,Wolves,203,0,1,14,1,4,3,0.0,20.0
...,...,...,...,...,...,...,...,...,...,...,...,...
564,470690.0,Yasin Ayari,Brighton,8,0,0,0,0,0,0,,
565,474264.0,Lewis Miley,Newcastle,0,0,0,0,0,0,0,,
566,475756.0,Kamari Doyle,Southampton,0,0,0,0,0,0,0,,
567,479809.0,George Abbott,Tottenham,0,0,0,0,0,0,0,,


In [31]:
cols_lhs = ['playerName' , 'team_name' , 'tackle_won_lhs' , 'tackle_%_lhs' , 'interception_lhs' , 'ball_won_lhs']
cols_rhs = ['playerName' , 'team_name' , 'tackle_won_rhs' , 'tackle_%_rhs' , 'interception_rhs' , 'ball_won_rhs']

In [32]:
lhs_actions_disp = actions_lhs[cols_lhs]
rhs_actions_disp = actions_rhs[cols_rhs]

In [33]:
lhs_actions_disp = lhs_actions_disp.sort_values('ball_won_lhs' , ascending=False)
lhs_actions_disp.head(10)
#lhs_actions_disp[lhs_actions_disp['team_name'] == 'Man City'].sort_values('ball_won_lhs' , ascending=False)

Unnamed: 0,playerName,team_name,tackle_won_lhs,tackle_%_lhs,interception_lhs,ball_won_lhs
453,Marc Guéhi,Crystal Palace,3,42.857143,18,57
355,Gabriel Magalhães,Arsenal,2,66.666667,7,51
349,Declan Rice,West Ham,9,81.818182,14,49
83,Lewis Dunk,Brighton,2,50.0,2,48
284,Wout Faes,Leicester,6,60.0,9,40
80,Tim Ream,Fulham,2,40.0,14,40
304,Antonee Robinson,Fulham,1,50.0,4,38
110,Ben Mee,Brentford,7,70.0,8,37
454,Cheick Doucouré,Crystal Palace,8,66.666667,11,36
190,James Tarkowski,Everton,1,25.0,3,36


In [34]:
rhs_actions_disp = rhs_actions_disp.sort_values('ball_won_rhs' , ascending=False)
rhs_actions_disp.head(10)
#lhs_actions_disp[lhs_actions_disp['team_name'] == 'Man City'].sort_values('ball_won_lhs' , ascending=False)

Unnamed: 0,playerName,team_name,tackle_won_rhs,tackle_%_rhs,interception_rhs,ball_won_rhs
74,Kieran Trippier,Newcastle,2,50.0,4,47
71,Fabian SchÃ¤r,Newcastle,4,33.333333,14,47
411,William Saliba,Arsenal,3,50.0,4,46
300,Rodri,Man City,3,42.857143,3,41
238,Ruben Neves,Wolves,7,70.0,9,40
454,Cheick Doucouré,Crystal Palace,7,38.888889,9,39
258,Robin Koch,Leeds,5,50.0,8,37
67,Idrissa Gueye,Everton,6,60.0,7,35
289,Ezri Konsa,Aston Villa,1,100.0,6,34
361,Bruno Guimarães,Newcastle,3,42.857143,7,33


In [20]:
#carries_hs = data_carries.groupby(['playerId', 'playerName', 'team_name']).agg(
 #   in_lhs=('in_lhs', 'sum'),
  #  into_lhs=('into_lhs', 'sum'), 
   # in_rhs=('in_rhs', 'sum'),
    #into_rhs=('into_rhs', 'sum')
#).reset_index()

In [21]:
#carries_hs

In [22]:
#lhs_actions_disp = actions_z14[cols1]
#rhs_actions_disp = actions_z14[cols3]

In [23]:
#lhs_actions_disp = lhs_actions_disp.sort_values('ball_won_lhs' , ascending=False)
#lhs_actions_disp[lhs_actions_disp['team_name'] == 'Man City'].sort_values('ball_won_lhs' , ascending=False)

In [24]:
#rhs_actions_disp = rhs_actions_disp.sort_values('ball_won_rhs' , ascending=False) 
#rhs_actions_disp[rhs_actions_disp['team_name'] == 'Liverpool'].sort_values('ball_won_rhs' , ascending=False)

In [25]:
#rhs_carries = carries_hs[cols5]
#rhs_carries.sort_values('into_rhs' , ascending=False)

In [26]:
#lhs_carries = carries_hs[cols2]
#lhs_carries.sort_values('into_lhs' , ascending=False)