In [None]:
#!pip install mplsoccer

## Ejemplo de EDA - Analisis Táctico de Posición de Jugadores

En este caso se nos pide realizar un Análisis táctico del juego de ambos equipos y clasificar a los jugadores en base a su posición en el campo en una línea táctica del campo:

    + Linea Defensiva
    + Línea de Mediocampo
    + Línea de ataque
    
Se nos proporcionan datos de tracking de 14 jugadores, arbitro y balón cada 10 ms (los elementos que aparecen normalmente en el campo visual de la cámara)

    - Posición x,y en el campo
    - id del jugador capturado
    - id de la jugada por si se quiere hacer un análisis continuado de la jugada
    - periodo de juego
    - las medidas del campo son 105 x 52 y el punto (0,0) se encuentra en el centro del campo
    

In [None]:
import pandas as pd
from tqdm.notebook import tqdm
import warnings
import json
from mplsoccer.pitch import Pitch
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
warnings.filterwarnings('ignore')

In [None]:
file = 'data/structured_data.json'
file2 = 'data/match_data.json'

In [None]:
with open(file) as f:
        match_js = json.load(f)
    
with open(file2) as f2:
        match_data_js = json.load(f2)

In [None]:
match_js[0]

In [None]:
for i in range(len(match_js)):
    if match_js[i]['data']:
        print(i)
        print(match_js[i]['data'])
        break
    else:
        continue

In [None]:
match_data_js['players'][0]

Vamos a tratar de extraer la información que necesitamos de cada uno de los archivos para poder montar una tabla con la que nos sea más fácil trabajar

In [None]:
event = 0
events = []
for i in tqdm(range(len(match_js))):
    for j in range(len(match_js[i]['data'])):
        try:
            if match_js[i]['data'][j]['trackable_object']:
                event_id = event +1
                pos_x = match_js[i]['data'][j]['x']
                pos_y = match_js[i]['data'][j]['y']
                time = match_js[i]['time']
                period = match_js[i]['period']
                track_object = match_js[i]['data'][j]['trackable_object']
                track_id = match_js[i]['data'][j]['track_id']


                ev = {'event_id': event_id,
                      'pos_x': pos_x,
                      'pos_y':pos_y,
                      'time': time,
                      'period': period,
                      'track_object': track_object,
                      'track_id': track_id}
                events.append(ev)
            else:
                pass
        except:
            pass

In [None]:
df = pd.DataFrame(events)

df.head()

In [None]:
df.info()

Como nos interesa saber la posición del jugador durante todo el partido vamos a crear una nueva columna con su posición absoluta durante todo el encuentro, ya que a mitad del partido los equipos cambian de posición en el terreno de juego

In [None]:
p1 = df[df.period == 1]
p2 = df[df.period == 2]
p2['pos_x_abs'] = [-1 * i for i in p2.pos_x]
p2['pos_y_abs'] = [-1 * i for i in p2.pos_y]
p1['pos_x_abs'] = [1 * i for i in p1.pos_x]
p1['pos_y_abs'] = [1 * i for i in p1.pos_y]
all_p = pd.concat([p1,p2],0)

Ahora vamos a hacer lo mismo con la tabla de info de jugadores, genero una tabla y la filtro con las columnas que me interesan

In [None]:
players = pd.DataFrame(match_data_js['players'])
players.head()

In [None]:
players = pd.DataFrame(match_data_js['players'])[['id','trackable_object','first_name','last_name','team_id']]
pos = [match_data_js['players'][i]['player_role']['name'] for i in range(len(players))]
id_pos = [match_data_js['players'][i]['player_role']['id'] for i in range(len(players))]
home_away = ['Home' if match_data_js['home_team']['id'] == players['team_id'][i] else 'Away' for i in range(len(players))]
players['Position'] = pos
players['id_pos'] = id_pos
players['Home_Away'] = home_away

In [None]:
players.head()

Ahora unimos los dos dataframes para tener una tabla final con la que poder trabajar

In [None]:
final = all_p.merge(players, how='left', left_on='track_object', right_on='trackable_object' )

Y divido la tabla entre el equipo de casa y el visitante para poder tratar con los datos de forma separada

In [None]:
final_home = final[final.Home_Away=='Home'].reset_index(drop=True)
final_home.reset_index(drop=True, inplace=True)
final_away = final[final.Home_Away=='Away']
final_away.reset_index(drop=True, inplace=True)

Con la intención de poder extrapolar nuestro análisis a otros partidos vamos a unificar el código en una función

## Análisis por Equipo

In [None]:
def generate_teams_match_data(file:str, file2:str)->(pd.DataFrame, pd.DataFrame):
    
    with open(file) as f:
        match_js = json.load(f)
    
    with open(file2) as f2:
        match_data_js = json.load(f2)
    
    event = 0
    events = []
    for i in tqdm(range(len(match_js))):
        for j in range(len(match_js[i]['data'])):
                try:
                    if match_js[i]['data'][j]['trackable_object']:
                        event_id = event +1
                        pos_x = match_js[i]['data'][j]['x']
                        pos_y = match_js[i]['data'][j]['y']
                        time = match_js[i]['time']
                        period = match_js[i]['period']
                        track_object = match_js[i]['data'][j]['trackable_object']
                        track_id = match_js[i]['data'][j]['track_id']


                        ev = {'event_id': event_id,
                              'pos_x': pos_x,
                              'pos_y':pos_y,
                              'time': time,
                              'period': period,
                              'track_object': track_object,
                              'track_id': track_id}
                        events.append(ev)
                    else:
                        pass
                except:
                    pass
    
    match = pd.DataFrame(events)
    p1 = match[match.period == 1]
    p2 = match[match.period == 2]
    p2['pos_x_abs'] = [-1 * i for i in p2.pos_x]
    p2['pos_y_abs'] = [-1 * i for i in p2.pos_y]
    p1['pos_x_abs'] = [1 * i for i in p1.pos_x]
    p1['pos_y_abs'] = [1 * i for i in p1.pos_y]
    all_p = pd.concat([p1,p2],0)
    
    players = pd.DataFrame(match_data_js['players'])[['id','trackable_object','first_name','last_name','team_id']]
    pos = [match_data_js['players'][i]['player_role']['name'] for i in range(len(players))]
    id_pos = [match_data_js['players'][i]['player_role']['id'] for i in range(len(players))]
    home_away = ['Home' if match_data_js['home_team']['id'] == players['team_id'][i] else 'Away' for i in range(len(players))]
    players['Position'] = pos
    players['id_pos'] = id_pos
    players['Home_Away'] = home_away
    
    final = all_p.merge(players, how='left', left_on='track_object', right_on='trackable_object' )
    
    final_home = final[final.Home_Away=='Home'].reset_index(drop=True)
    final_home.reset_index(drop=True, inplace=True)
    final_away = final[final.Home_Away=='Away']
    final_away.reset_index(drop=True, inplace=True)
    
    return final_home, final_away
    

In [None]:
f_h, f_a = generate_teams_match_data('data/structured_data.json','data/match_data.json' )

In [None]:
final_home.shape == f_h.shape

Con los datos ya generados podemos empezar nuestro análisis de los equipos

In [None]:
plt.figure(figsize=(12,6))
sns.histplot(f_h.pos_x_abs)
plt.title(f'{f_h.Home_Away[0]} X Position')
plt.axvline(-12,
            c='r',
            linewidth=3.,
            linestyle='--',
            label='Defending Line Limit')
plt.axvline(12,
            c='b',
            linewidth=3.,
            linestyle='--',
            label='Atacking Line Limit')
plt.ylabel(None)
plt.xlabel(None)
plt.show();

In [None]:
plt.figure(figsize=(12,6))
sns.histplot(f_a.pos_x_abs)
plt.title(f'{f_a.Home_Away[0]} X Position')
plt.axvline(12,
            c='r',
            linewidth=3.,
            linestyle='--',
            label='Defending Line Limit')
plt.axvline(-12,
            c='b',
            linewidth=3.,
            linestyle='--',
            label='Atacking Line Limit')
plt.ylabel(None)
plt.xlabel(None)
plt.show();

In [None]:
def plot_team_hist(df:pd.DataFrame)->None:
    
    '''
        This function reveices a dataframe and returns an histplot from the x values of all the players
        
        Parameters:
            df: Pandas dataframe with the team data
            
        Returns:
        
            Histogram of th value pos_x_abs
    
    '''
    
    plt.figure(figsize=(12,6))
    sns.histplot(f_h.pos_x_abs)
    plt.title(f'{df.Home_Away[0]} X Position')
    plt.ylabel(None)
    plt.xlabel(None)
    plt.show();

Como podemos observar a simple vista el equipo local estuvo más tiempo en zona defensiva y el equipo visitante estuvo más en la zona del medio campo y ataque, en este punto vamos ha utilizar una librería específica para poder pintar mapas de calor en el futbol

In [None]:
def plot_heatmap_team(df:pd.DataFrame, players:str)->None:
    
    print('Calculando posición de los jugadores en el terreno de juego ...\n')
    
    start_p = [n for n in df[df.period == 2].sort_values('id_pos').trackable_object.unique() if n in df[df.period == 1].trackable_object.unique()]
    subs = [n for n in df[df.period == 2].sort_values('id_pos').trackable_object.unique() if n not in df[df.period == 1].trackable_object.unique()]
    
    with plt.xkcd():
        matplotlib.rcParams['font.family'] = ['CENTAUR']
        pitch = Pitch(pitch_type='skillcorner', pitch_length= 105, pitch_width=68, pitch_color='limegreen', 
                  line_color='white', stripe=True, line_zorder=3)
        
        fig, ax = pitch.draw(figsize=(12,6))

        custcmap = matplotlib.colors.LinearSegmentedColormap.from_list('custom', ['limegreen','green','yellow','gold','orange','orangered','red'])
        kde = pitch.kdeplot(df.pos_x_abs, df.pos_y_abs, fill=True, ax=ax,
                       levels=100,shade=True, shade_lowest=True, cut=4, cmap=custcmap, zorder=1)
        
        print('Creando Mapa de calor ...\n')
        
        if players == 'start':
            def_line = 0
            mid_line = 0
            atk_line = 0
            for p in tqdm(start_p):
                
                name = df[df.trackable_object == p]['last_name'].values[0]
                x = round(df[df.trackable_object == p].pos_x_abs.median(),2)
                y = round(df[df.trackable_object == p].pos_y_abs.median(),2)
                
                if df.Home_Away.unique()[0] == 'Away':
                    
                    if 12 < x:
                        pl_clas = 'Defending Line'
                        def_line += 1
                    elif -12 < x < 12:
                        pl_clas = 'Midfield Line'
                        mid_line += 1
                    else:
                        pl_clas = 'Attacking Line'
                        atk_line += 1
                else:
                    
                    if 12 < x:
                        pl_clas = 'Attacking Line'
                        atk_line += 1
                    elif -12 < x < 12:
                        pl_clas = 'Midfield Line'
                        mid_line += 1
                    else:
                        pl_clas = 'Defending Line'
                        def_line += 1
                        
                print(f'{name} : {pl_clas} --> ({x},{y})')
                plt.annotate(f'+ {name}',(x,y), fontsize=20, zorder=4)
            #plt.annotate(f'Initial Squad - Tactic 1-{def_line-1}-{mid_line}-{atk_line}', (-25,35), color='black', fontsize= 20, zorder=4)
            plt.plot([-12,-12],[-35,35] ,color='r', zorder=3)
            plt.plot([12,12],[-35,35] ,color='r', zorder=3)
            plt.title(f'{df.Home_Away.unique()[0]} Team Initial Squad - Tactic 1-{def_line-1}-{mid_line}-{atk_line}', fontsize=25)
        
            print('\nGuardando imagen ...\n')
            plt.savefig(f'images/{df.Home_Away.unique()[0]}_Team.png')
        
        else:
            
            for p in tqdm(subs):
                name = df[df.trackable_object == p]['last_name'].values[0]
                x = round(df[df.trackable_object == p].pos_x_abs.median(),2)
                y = round(df[df.trackable_object == p].pos_y_abs.median(),2)
                
                if df.Home_Away.unique()[0] == 'Away':
                    
                    if 12 < x:
                        pl_clas = 'Defending Line'
                    elif -12 < x < 12:
                        pl_clas = 'Midfield Line'
                    else:
                        pl_clas = 'Attacking Line'
                        
                else:
                    
                    if 12 < x:
                        pl_clas = 'Attacking Line'
                    elif -12 < x < 12:
                        pl_clas = 'Midfield Line'
                    else:
                        pl_clas = 'Defending Line'
                        
                print(f'{name} : {pl_clas} --> (x:{x}, y:{y})')
                plt.annotate(f'+ {name}',(x,y), fontsize=20, zorder=4)
            #plt.annotate('Subtitutions', (-15,35), fontsize= 20)
            
            plt.plot([-12,-12],[-35,35] ,color='r', zorder=3)
            plt.plot([12,12],[-35,35] ,color='b', zorder=3)
            plt.title(f'{df.Home_Away.unique()[0]} Team Subtitutions', fontsize=25)
        
            print('\nGuardando imagen ...\n')
            plt.savefig(f'images/{df.Home_Away.unique()[0]}_Team_Subtitutions.png')
        plt.show();

Vamos a imprimir el mapa de calor del once inical del equipo local

#### Equipo Local

In [None]:
plot_heatmap_team(f_h, 'start')

Ahora el del visitante

#### Equipo Visitante

In [None]:
plot_heatmap_team(f_a, 'start')

## Análisis Jugador por Jugador

Con unos pequeños cambios podemos adaptar las funciones que creamos antes para analizar a un jugador específico

In [None]:
def generate_player_match_data(file:str, file2:str, id_player:str)->pd.DataFrame:    
    
    with open(file) as f:
            match_js = json.load(f)

    with open(file2) as f2:
            match_data_js = json.load(f2)

    event = 0
    events = []
    for i in tqdm(range(len(match_js))):
        for j in range(len(match_js[i]['data'])):
            try:
                if match_js[i]['data'][j]['trackable_object'] == id_player:
                    event_id = event +1
                    pos_x = match_js[i]['data'][j]['x']
                    pos_y = match_js[i]['data'][j]['y']
                    time = match_js[i]['time']
                    period = match_js[i]['period']
                    track_object = id_player


                    ev = {'event_id': event_id,
                          'pos_x': pos_x,
                          'pos_y':pos_y,
                          'time': time,
                          'period': period,
                          'track_object': track_object}
                    events.append(ev)
                else:
                    pass
            except:
                pass

    match = pd.DataFrame(events)
    p1 = match[match.period == 1]
    p2 = match[match.period == 2]
    p2['pos_x_abs'] = [-1 * i for i in p2.pos_x]
    p2['pos_y_abs'] = [-1 * i for i in p2.pos_y]
    p1['pos_x_abs'] = [1 * i for i in p1.pos_x]
    p1['pos_y_abs'] = [1 * i for i in p1.pos_y]
    all_p = pd.concat([p1,p2],0)

    players = pd.DataFrame(match_data_js['players'])[['id','trackable_object','first_name','last_name','team_id']]
    pos = [match_data_js['players'][i]['player_role']['name'] for i in range(len(players))]
    id_pos = [match_data_js['players'][i]['player_role']['id'] for i in range(len(players))]
    home_away = ['Home' if match_data_js['home_team']['id'] == players['team_id'][i] else 'Away' for i in range(len(players))]
    players['Position'] = pos
    players['id_pos'] = id_pos
    players['Home_Away'] = home_away

    final = all_p.merge(players, how='left', left_on='track_object', right_on='trackable_object' )
    
    return final

In [None]:
def plot_player_heatmap(df:pd.DataFrame)->None:
    
    name = df['last_name'][0]
    x = round(df.pos_x_abs.median(),2)
    y = round(df.pos_y_abs.median(),2)
                
    if df.Home_Away.unique()[0] == 'Away':
                    
        if 12 < x:
            pl_clas = 'Defending Line'
        elif -12 < x < 12:
            pl_clas = 'Midfield Line'   
        else:
            pl_clas = 'Attacking Line'
                
    else:    
        if 12 < x:
            pl_clas = 'Attacking Line'
        elif -12 < x < 12:
            pl_clas = 'Midfield Line' 
        else:
            pl_clas = 'Defending Line'
            
    print('Calculando posiciónes del jugadore en el terreno de juego ...\n')
    print('Creando Mapa de calor ...\n')
    
    with plt.xkcd():
        matplotlib.rcParams['font.family'] = ['CENTAUR']
        pitch = Pitch(pitch_type='skillcorner', pitch_length= 105, pitch_width=68, pitch_color='limegreen', 
                  line_color='white', stripe=False, line_zorder=3)
        fig, ax = pitch.draw(figsize=(12,6))
        
        custcmap = matplotlib.colors.LinearSegmentedColormap.from_list('custom', ['limegreen','green','yellow', 'gold','orange','orangered','red'])
        kde = pitch.kdeplot(df.pos_x_abs, df.pos_y_abs, fill=True, ax=ax,
                       levels=100, shade_lowest=True, cut=4, cmap=custcmap, zorder=1)
        
        plt.title(f'{df.first_name[0]} {name} - {pl_clas}', fontsize=25) 
        plt.annotate(f'+ {name}',(x,y), fontsize=20, zorder=4)
        plt.plot([-12,-12],[-35,35] ,color='r', zorder=3)
        plt.plot([12,12],[-35,35] ,color='r', zorder=3)
        
        
        print(f'{name} : {pl_clas} --> ({x},{y})')
        print('\nGuardando imagen ...\n')
        plt.savefig(f'images/{df.Home_Away[0]}/{name}_heatmap.png')
        
        plt.show();

Con estas dos funciones podemos generar mapas de calor de cada uno de los jugadores de ambos equipo para clasificarlos individualmente en la línea táctica

In [None]:
start_p_h = [n for n in f_h[f_h.period == 2].sort_values('id_pos').trackable_object.unique() if n in f_h[f_h.period == 1].trackable_object.unique()]
subs_h = [n for n in f_h[f_h.period == 2].sort_values('id_pos').trackable_object.unique() if n not in f_h[f_h.period == 1].trackable_object.unique()]
start_p_a = [n for n in f_a[f_a.period == 2].sort_values('id_pos').trackable_object.unique() if n in f_a[f_a.period == 1].trackable_object.unique()]
subs_a = [n for n in f_a[f_a.period == 2].sort_values('id_pos').trackable_object.unique() if n not in f_a[f_a.period == 1].trackable_object.unique()]

#### Análisis del equipo titular local

In [None]:
for p in start_p_h:
    pl = generate_player_match_data('data/structured_data.json','data/match_data.json', p)
    plot_player_heatmap(pl)

#### Análisis de jugadores que entraron de recambio en el equipo local

In [None]:
for p in subs_h:
    pl = generate_player_match_data('data/structured_data.json','data/match_data.json', p)
    plot_player_heatmap(pl)

#### Análisis del equipo titular visitante

In [None]:
for p in start_p_a:
    pl = generate_player_match_data('data/structured_data.json','data/match_data.json', p)
    plot_player_heatmap(pl)

#### Análisis de jugadores que entraron de recambio en el equipo visitante

In [None]:
for p in subs_a:
    pl = generate_player_match_data('data/structured_data.json','data/match_data.json', p)
    plot_player_heatmap(pl)