In [250]:
from mplsoccer.pitch import Pitch, VerticalPitch
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
from highlight_text import HighlightText, ax_text, fig_text
import numpy as np
from matplotlib.colors import LinearSegmentedColormap
from scipy.ndimage import gaussian_filter
import matplotlib.patheffects as path_effects

In [251]:
data = pd.read_csv("Ligue_1_22_23.csv" , low_memory=False)
data['x'] = data['x']*1.2
data['y'] = data['y']*.8
data['endX'] = data['endX']*1.2
data['endY'] = data['endY']*.8

In [252]:
def add_carries(game_df):
   min_dribble_length: float = 0.0
   max_dribble_length: float = 100.0
   max_dribble_duration: float = 20.0
   game_df['time_seconds']=game_df['minute']*60+game_df['second']
   next_actions = data.shift(-1)
   same_game=game_df.matchId==next_actions.matchId
   same_team = game_df.teamId == next_actions.teamId
   dx = game_df.endX - next_actions.x
   dy = game_df.endY- next_actions.y
   far_enough=dy** 2 + dy** 2 >= min_dribble_length** 2
   not_too_far = dx ** 2+ dy** 2<= max_dribble_length ** 2
   dt = next_actions.time_seconds - game_df.time_seconds
   same_phase = dt < max_dribble_duration
   same_period = game_df.period == next_actions.period
   dribble_idx = same_team & far_enough & not_too_far & same_phase & same_period&same_game
   dribbles = pd.DataFrame()
   prev = game_df[dribble_idx]
   nex= next_actions[dribble_idx]
   dribbles['matchId' ] = nex.matchId
   dribbles['period' ] = nex.period
   for cols in ['expandedMinute']:
      dribbles[cols] = nex[cols]
   for cols in ['passKey','assist']:
      dribbles[ cols] = [True for _ in range (len (dribbles))]
   dribbles['isTouch'] = [True for _ in range (len (dribbles))]
   morecols = ['playerId', 'team_name','playerName','h_a']
   for cols in morecols:
      dribbles[cols] = nex[cols]
   dribbles['action_id'] = prev.eventId + 0.1
   dribbles['time_seconds' ] = (prev.time_seconds + nex.time_seconds) / 2
   dribbles['teamId'] = nex.teamId
   dribbles['playerId']= nex.playerId
   dribbles['playerName' ]= nex.playerName
   dribbles['x'] = prev.endX
   dribbles['y']= prev.endY
   dribbles['endX'] = nex.x
   dribbles['endY'] = nex.y
   dribbles['type'] = ['Carry' for _ in range(len(dribbles))]
   dribbles['outcomeType'] = ['Successful' for _ in range(len(dribbles))]
   dribbles['qualifiers']=[{} for _ in range(len(dribbles))]
   game_df=pd.concat([game_df,dribbles], ignore_index=True,sort=False)
   game_df=game_df.sort_values(['matchId','period','action_id']).reset_index(drop=True)
   game_df['action_id']=range(len(game_df))
   return game_df

In [253]:
data = add_carries(data)

In [254]:
data_carries = data[data['type'] == 'Carry']

In [255]:
data['take_on_won'] = (data['type'] == 'TakeOn') & (data['outcomeType'] == 'Successful')
data['take_on_lost'] = (data['type'] == 'TakeOn') & (data['outcomeType'] == 'Unsuccessful')
data['tackle_won'] = (data['type'] == 'Tackle') & (data['outcomeType'] == 'Successful')
data['tackle_lost'] = (data['type'] == 'Tackle') & (data['outcomeType'] == 'Unsuccessful')
data['interception'] = (data['type'] == 'Interception') & (data['outcomeType'] == 'Successful')

In [256]:
data['ball_won'] = (data['type'] == 'BallRecovery')

In [257]:
left_halfspace_x_min = 18
left_halfspace_x_max = 18 + 42
left_halfspace_y_min = 50
left_halfspace_y_max = 50 + 12
    
    
right_halfspace_x_min = 18
right_halfspace_x_max = 18 + 42
right_halfspace_y_min = 18
right_halfspace_y_max = 18 + 12


# Add 'in_rhs' column to data_succ_prog
data['in_rhs'] = (data['x'].between(right_halfspace_x_min, right_halfspace_x_max) & 
                       data['y'].between(right_halfspace_y_min, right_halfspace_y_max))

# Add 'in_lhs' column to data_succ_prog
data['in_lhs'] = (data['x'].between(left_halfspace_x_min, left_halfspace_x_max) & 
                       data['y'].between(left_halfspace_y_min, left_halfspace_y_max))

# Add 'into_rhs' column to data_succ_prog
#data['into_rhs'] = (data['endX'].between(right_halfspace_x_min, right_halfspace_x_max) & 
 #                        data['endY'].between(right_halfspace_y_min, right_halfspace_y_max) & 
  #                       (data['in_rhs'] == False))

# Add 'into_lhs' column to data_succ_prog
#data['into_lhs'] = (data['endX'].between(left_halfspace_x_min, left_halfspace_x_max) & 
 #                        data['endY'].between(left_halfspace_y_min, left_halfspace_y_max) & 
  #                       (data['in_lhs'] == False))

# Add 'in_rhs' column to data_carries_succ_prog
data_carries['in_rhs'] = (data_carries['x'].between(right_halfspace_x_min, right_halfspace_x_max) & 
                          data_carries['y'].between(right_halfspace_y_min, right_halfspace_y_max))

# Add 'in_lhs' column to data_carries_succ_prog
data_carries['in_lhs'] = (data_carries['x'].between(left_halfspace_x_min, left_halfspace_x_max) & 
                          data_carries['y'].between(left_halfspace_y_min, left_halfspace_y_max))

# Add 'into_rhs' column to data_carries_succ_prog
#data_carries['into_rhs'] = (data_carries['endX'].between(right_halfspace_x_min, right_halfspace_x_max) & 
 #                           data_carries['endY'].between(right_halfspace_y_min, right_halfspace_y_max) & 
  #                          (data_carries['in_rhs'] == False))

# Add 'into_lhs' column to data_carries_succ_prog
#data_carries['into_lhs'] = (data_carries['endX'].between(left_halfspace_x_min, left_halfspace_x_max) & 
 #                           data_carries['endY'].between(left_halfspace_y_min, left_halfspace_y_max) & 
  #                          (data_carries['in_lhs'] == False))


data['take_on_won_lhs'] = (data['take_on_won'] == True) & (data['in_lhs'] == True)
data['take_on_lost_lhs'] = (data['take_on_lost'] == True) & (data['in_lhs'] == True)
data['ball_won_lhs'] = (data['type'] == 'BallRecovery') & (data['in_lhs'] == True)

data['tackle_won_lhs'] = (data['tackle_won'] == True) & (data['in_lhs'] == True)
data['tackle_lost_lhs'] = (data['tackle_lost'] == True) & (data['in_lhs'] == True)
data['interception_lhs'] = (data['type'] == 'Interception') & (data['in_lhs'] == True)


data['take_on_won_rhs'] = (data['take_on_won'] == True) & (data['in_rhs'] == True)
data['take_on_lost_rhs'] = (data['take_on_lost'] == True) & (data['in_rhs'] == True)
data['ball_won_rhs'] = (data['type'] == 'BallRecovery') & (data['in_rhs'] == True)

data['tackle_won_rhs'] = (data['tackle_won'] == True) & (data['in_rhs'] == True)
data['tackle_lost_rhs'] = (data['tackle_lost'] == True) & (data['in_rhs'] == True)
data['interception_rhs'] = (data['type'] == 'Interception') & (data['in_rhs'] == True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_carries['in_rhs'] = (data_carries['x'].between(right_halfspace_x_min, right_halfspace_x_max) &
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_carries['in_lhs'] = (data_carries['x'].between(left_halfspace_x_min, left_halfspace_x_max) &


In [258]:
actions_lhs = data.groupby(['playerId', 'playerName', 'team_name']).agg(
    in_lhs=('in_lhs', 'sum'),
    #into_lhs=('into_lhs', 'sum'),
    take_on_won_lhs=('take_on_won_lhs', 'sum'),
    take_on_lost_lhs=('take_on_lost_lhs', 'sum'),
    ball_won_lhs=('ball_won_lhs', 'sum'),
    tackle_won_lhs=('tackle_won_lhs', 'sum'),
    tackle_lost_lhs=('tackle_lost_lhs', 'sum'),
    interception_lhs=('interception_lhs', 'sum')
    
).reset_index()


actions_lhs['take_on_%_lhs'] = (actions_lhs['take_on_won_lhs'] / (actions_lhs['take_on_won_lhs'] + actions_lhs['take_on_lost_lhs'])) * 100
actions_lhs['tackle_%_lhs'] = (actions_lhs['tackle_won_lhs'] / (actions_lhs['tackle_won_lhs'] + actions_lhs['tackle_lost_lhs'])) * 100


In [259]:
actions_rhs = data.groupby(['playerId', 'playerName', 'team_name']).agg(
    in_rhs=('in_rhs', 'sum'),
    #into_rhs=('into_lhs', 'sum'),
    take_on_won_rhs=('take_on_won_rhs', 'sum'),
    take_on_lost_rhs=('take_on_lost_rhs', 'sum'),
    ball_won_rhs=('ball_won_rhs', 'sum'),
    tackle_won_rhs=('tackle_won_rhs', 'sum'),
    tackle_lost_rhs=('tackle_lost_rhs', 'sum'),
    interception_rhs=('interception_rhs', 'sum')
    
).reset_index()


actions_rhs['take_on_%_rhs'] = (actions_rhs['take_on_won_rhs'] / (actions_rhs['take_on_won_rhs'] + actions_rhs['take_on_lost_rhs'])) * 100
actions_rhs['tackle_%_rhs'] = (actions_rhs['tackle_won_rhs'] / (actions_rhs['tackle_won_rhs'] + actions_rhs['tackle_lost_rhs'])) * 100


In [260]:
actions_lhs

Unnamed: 0,playerId,playerName,team_name,in_lhs,take_on_won_lhs,take_on_lost_lhs,ball_won_lhs,tackle_won_lhs,tackle_lost_lhs,interception_lhs,take_on_%_lhs,tackle_%_lhs
0,6683.0,Dante,Nice,1661,2,2,57,7,2,12,50.000000,77.777778
1,9909.0,Sergio Ramos,Paris Saint-Germain,482,0,0,18,0,0,1,,
2,11119.0,Lionel Messi,Paris Saint-Germain,83,2,1,0,0,1,0,66.666667,0.000000
3,14058.0,Dimitri Payet,Marseille,74,0,0,8,1,0,0,,100.000000
4,19453.0,Kevin Gameiro,Strasbourg,104,0,1,6,2,0,0,0.000000,100.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
602,481084.0,Fallou Fall,Reims,9,0,0,0,0,0,0,,
603,481174.0,Ben Touré,AC Ajaccio,7,0,0,1,2,0,0,,100.000000
604,482386.0,Amine Messoussa,Lille,0,0,0,0,0,0,0,,
605,482831.0,Mamadou Diakhon,Reims,1,1,0,0,0,0,0,100.000000,


In [261]:
cols_lhs = ['playerName' , 'team_name' , 'tackle_won_lhs' , 'tackle_%_lhs' , 'interception_lhs' , 'ball_won_lhs']
cols_rhs = ['playerName' , 'team_name' , 'tackle_won_rhs' , 'tackle_%_rhs' , 'interception_rhs' , 'ball_won_rhs']

In [262]:
lhs_actions_disp = actions_lhs[cols_lhs]
rhs_actions_disp = actions_rhs[cols_rhs]

In [263]:
lhs_actions_disp = lhs_actions_disp.sort_values('ball_won_lhs' , ascending=False)
lhs_actions_disp.head(20)
#lhs_actions_disp[lhs_actions_disp['team_name'] == 'Liverpool'].sort_values('ball_won_lhs' , ascending=False)

Unnamed: 0,playerName,team_name,tackle_won_lhs,tackle_%_lhs,interception_lhs,ball_won_lhs
0,Dante,Nice,7,77.777778,12,57
419,Lilian Brassier,Brest,1,33.333333,9,49
65,Jordan Ferri,Montpellier,4,100.0,5,49
460,Castello Lukeba,Lyon,2,50.0,15,47
145,Yunis Abdelhamid,Reims,5,55.555556,20,42
67,Jordan Veretout,Marseille,4,66.666667,2,41
433,Gideon Mensah,Auxerre,4,100.0,5,41
331,Montassar Talbi,Lorient,3,42.857143,7,41
334,Youssouf Fofana,Monaco,10,71.428571,6,38
425,Arthur Theate,Rennes,3,100.0,2,36


In [264]:
rhs_actions_disp = rhs_actions_disp.sort_values('ball_won_rhs' , ascending=False)
rhs_actions_disp.head(15)
#rhs_actions_disp[rhs_actions_disp['team_name'] == 'Bournemouth'].sort_values('ball_won_rhs' , ascending=False)

Unnamed: 0,playerName,team_name,tackle_won_rhs,tackle_%_rhs,interception_rhs,ball_won_rhs
234,Axel Disasi,Monaco,0,0.0,13,44
239,Pierre Lees-Melou,Brest,4,50.0,8,43
192,Stijn Spierings,Toulouse,9,90.0,6,43
330,Jean-Clair Todibo,Nice,9,60.0,3,42
434,Emmanuel Agbadou,Reims,2,40.0,5,42
286,Kevin Danso,Lens,5,83.333333,3,39
298,Rominigue Kouamé,Troyes,3,60.0,7,37
132,Jubal,Auxerre,4,50.0,17,35
464,Anthony Rouault,Toulouse,0,0.0,15,35
76,Chancel Mbemba,Marseille,2,50.0,9,34


In [265]:
#carries_hs = data_carries.groupby(['playerId', 'playerName', 'team_name']).agg(
 #   in_lhs=('in_lhs', 'sum'),
  #  into_lhs=('into_lhs', 'sum'), 
   # in_rhs=('in_rhs', 'sum'),
    #into_rhs=('into_rhs', 'sum')
#).reset_index()

In [266]:
#carries_hs

In [267]:
#lhs_actions_disp = actions_z14[cols1]
#rhs_actions_disp = actions_z14[cols3]

In [268]:
#lhs_actions_disp = lhs_actions_disp.sort_values('ball_won_lhs' , ascending=False)
#lhs_actions_disp[lhs_actions_disp['team_name'] == 'Man City'].sort_values('ball_won_lhs' , ascending=False)

In [269]:
#rhs_actions_disp = rhs_actions_disp.sort_values('ball_won_rhs' , ascending=False) 
#rhs_actions_disp[rhs_actions_disp['team_name'] == 'Liverpool'].sort_values('ball_won_rhs' , ascending=False)

In [270]:
#rhs_carries = carries_hs[cols5]
#rhs_carries.sort_values('into_rhs' , ascending=False)

In [271]:
#lhs_carries = carries_hs[cols2]
#lhs_carries.sort_values('into_lhs' , ascending=False)

In [272]:
merged_actions = pd.merge(actions_lhs, actions_rhs, on=['playerName', 'team_name'], suffixes=('_lhs', '_rhs'))
merged_actions['ball_won_hs'] = merged_actions['ball_won_lhs'] + merged_actions['ball_won_rhs']
merged_actions['interception_hs'] = merged_actions['interception_lhs'] + merged_actions['interception_rhs']
merged_actions['tackle_won_hs'] = merged_actions['tackle_won_lhs'] + merged_actions['tackle_won_rhs']
merged_actions['take_on_won_hs'] = merged_actions['take_on_won_lhs'] + merged_actions['take_on_won_rhs']
sorted_actions = merged_actions[['playerName', 'team_name', 'ball_won_hs' , 'tackle_won_hs' , 'interception_hs' , 'take_on_won_hs']].sort_values(by='take_on_won_hs', ascending=False)


In [273]:
sorted_actions.head(20)

Unnamed: 0,playerName,team_name,ball_won_hs,tackle_won_hs,interception_hs,take_on_won_hs
121,Nabil Bentaleb,Angers,55,10,4,11
120,Rafael Ratão,Toulouse,22,3,3,10
186,Seko Fofana,Lens,50,5,5,10
298,Rominigue Kouamé,Troyes,54,7,12,10
512,Ismaël Doukouré,Strasbourg,32,2,5,8
477,Salis Abdul Samed,Lens,45,4,10,8
414,Enzo Le Fée,Lorient,62,17,7,8
55,Marco Verratti,Paris Saint-Germain,34,6,6,8
151,Téji Savanier,Montpellier,38,3,6,8
334,Youssouf Fofana,Monaco,70,11,14,8
