In [1]:
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

from src import preprocessing as pre
from src import phases


from kloppy import skillcorner
from mplsoccer import Pitch, VerticalPitch
from pathlib import Path
import pandas as pd
from matplotlib.colors import LinearSegmentedColormap
import re
import numpy as np


In [2]:
match_id = 1886347
match_metadata = pre.load_metadata(match_id)

In [3]:
team_id = match_metadata['home_team']['id']

In [9]:
tracking_df = pre.prepare_team_tracking(match_id, team_id, is_home_team=True,include_phases_of_play = False,)

  .apply(pick_gk_for_frame)


In [11]:
tracking_df = pre.add_phases_of_play_info(tracking_df, match_id, team_id)

454 phases of play loaded
454 phases of play frames expanded
157424 tracking frames merged with phases of play
Index(['frame_id', 'timestamp', 'period_id', 'ball_state',
       'ball_owning_team_id', 'ball_x', 'ball_y', 'player_id', 'x', 'y',
       'jersey_no', 'first_name', 'last_name', 'name', 'team_id', 'position',
       'match_time_s', 'match_time_td', 'in_possession', 'ball_zone_x',
       'ball_zone_y', 'ball_zone_label', 'index', 'match_id',
       'team_in_possession_id', 'team_in_possession_phase_type',
       'team_in_possession_phase_type_id', 'team_out_of_possession_phase_type',
       'team_out_of_possession_phase_type_id', 'team_possession_lead_to_shot',
       'team_possession_lead_to_goal', 'team_possession_loss_in_phase',
       'n_player_possessions_in_phase', 'team_in_possession',
       'team_phase_type'],
      dtype='object')
157424 tracking frames with team phase of play info added


In [None]:
phases_of_play.columns

Index(['index', 'match_id', 'frame_start', 'frame_end', 'time_start',
       'time_end', 'minute_start', 'second_start', 'duration', 'period',
       'attacking_side_id', 'team_in_possession_id', 'attacking_side',
       'team_in_possession_shortname', 'n_player_possessions_in_phase',
       'team_possession_loss_in_phase', 'team_possession_lead_to_goal',
       'team_possession_lead_to_shot', 'team_in_possession_phase_type',
       'team_in_possession_phase_type_id', 'team_out_of_possession_phase_type',
       'team_out_of_possession_phase_type_id', 'x_start', 'y_start',
       'channel_id_start', 'channel_start', 'third_id_start', 'third_start',
       'penalty_area_start', 'x_end', 'y_end', 'channel_id_end', 'channel_end',
       'third_id_end', 'third_end', 'penalty_area_end',
       'team_in_possession_width_start', 'team_in_possession_width_end',
       'team_in_possession_length_start', 'team_in_possession_length_end',
       'team_out_of_possession_width_start',
       'team_ou

In [None]:
def expand_phases_to_frames(phases_df: pd.DataFrame) -> pd.DataFrame:
    """
    Expande cada fase de juego a todos los frames comprendidos entre
    frame_start y frame_end (inclusive).

    Devuelve un DataFrame con UNA fila por frame_id.
    """
    rows = []

    # Elegí las columnas que querés “arrastrar” a nivel frame
    cols_keep = [
        "index",
        "match_id",
        "frame_start",
        "frame_end",
        "team_in_possession_id",
        "team_in_possession_phase_type",
        "team_in_possession_phase_type_id",
        "team_out_of_possession_phase_type",
        "team_out_of_possession_phase_type_id",
        "team_possession_lead_to_shot",
        "team_possession_lead_to_goal",
        "team_possession_loss_in_phase",
        "n_player_possessions_in_phase",
    ]

    phases_df = phases_df[cols_keep].copy()

    for _, r in phases_df.iterrows():
        frames = np.arange(r.frame_start, r.frame_end + 1, dtype=int)
        base = {col: r[col] for col in cols_keep if col not in ("frame_start", "frame_end")}
        df_phase = pd.DataFrame(base, index=frames)
        df_phase["frame_id"] = frames
        rows.append(df_phase)

    phases_per_frame = pd.concat(rows, ignore_index=True)

    # Nos aseguramos de no duplicar frames (no debería pasar, pero por las dudas)
    phases_per_frame = phases_per_frame.drop_duplicates(subset=["frame_id"])
    phases_per_frame = phases_per_frame.sort_values("frame_id").reset_index(drop=True)

    return phases_per_frame


In [None]:
def merge_phases_into_tracking(
    tracking_df: pd.DataFrame,
    phases_per_frame: pd.DataFrame,
) -> pd.DataFrame:
    """
    Hace un left-join por frame_id para añadir info de phases_of_play
    al tracking frame a frame.
    """
    merged = tracking_df.merge(
        phases_per_frame,
        on="frame_id",
        how="left",
        validate="many_to_one",  # muchos frames de tracking → una fila en phases_per_frame
    )
    return merged

In [None]:
def add_team_phase_of_play_info(
    df: pd.DataFrame,
    my_team_id: int,
    col_name: str = "team_phase_type",
) -> pd.DataFrame:
    """
    Añade una columna con la fase relevante para mi equipo:
    - Si mi equipo está en posesión en ese frame -> phase = team_in_possession_phase_type
    - Si no -> phase = team_out_of_possession_phase_type
    """
    df = df.copy()

    # ¿Mi equipo está en posesión en este frame?
    df["team_in_possession"] = df["team_in_possession_id"] == my_team_id

    df[col_name] = np.where(
        df["team_in_possession"],
        df["team_in_possession_phase_type"],
        df["team_out_of_possession_phase_type"],
    )

    return df


In [None]:
tracking_df.columns

Index(['frame_id', 'timestamp', 'period_id', 'ball_state',
       'ball_owning_team_id', 'ball_x', 'ball_y', 'player_id', 'x', 'y',
       'jersey_no', 'first_name', 'last_name', 'name', 'team_id', 'position',
       'index', 'match_id', 'team_in_possession_id',
       'team_in_possession_phase_type', 'team_in_possession_phase_type_id',
       'team_out_of_possession_phase_type',
       'team_out_of_possession_phase_type_id', 'team_possession_lead_to_shot',
       'team_possession_lead_to_goal', 'team_possession_loss_in_phase',
       'n_player_possessions_in_phase', 'team_in_possession',
       'team_phase_type', 'match_time_s', 'match_time_td', 'in_possession',
       'ball_zone_x', 'ball_zone_y', 'ball_zone_label'],
      dtype='object')