In [10]:
import sys, os
from pathlib import Path
ROOT = Path.cwd().parents[1]
sys.path.insert(0, str(ROOT))

from src import preprocessing as pre
from src import phases


from kloppy import skillcorner
from mplsoccer import Pitch, VerticalPitch
from pathlib import Path
import pandas as pd
from matplotlib.colors import LinearSegmentedColormap
import re
import numpy as np


In [11]:
match_id = 1886347
match_metadata = pre.load_metadata(match_id)

In [12]:
team_id = match_metadata['home_team']['id']

In [13]:
df_phases = pre.load_phases_of_play(match_id)

In [14]:
df_phases.columns

Index(['index', 'match_id', 'frame_start', 'frame_end', 'time_start',
       'time_end', 'minute_start', 'second_start', 'duration', 'period',
       'attacking_side_id', 'team_in_possession_id', 'attacking_side',
       'team_in_possession_shortname', 'n_player_possessions_in_phase',
       'team_possession_loss_in_phase', 'team_possession_lead_to_goal',
       'team_possession_lead_to_shot', 'team_in_possession_phase_type',
       'team_in_possession_phase_type_id', 'team_out_of_possession_phase_type',
       'team_out_of_possession_phase_type_id', 'x_start', 'y_start',
       'channel_id_start', 'channel_start', 'third_id_start', 'third_start',
       'penalty_area_start', 'x_end', 'y_end', 'channel_id_end', 'channel_end',
       'third_id_end', 'third_end', 'penalty_area_end',
       'team_in_possession_width_start', 'team_in_possession_width_end',
       'team_in_possession_length_start', 'team_in_possession_length_end',
       'team_out_of_possession_width_start',
       'team_ou

In [15]:
df_phases[df_phases.team_in_possession_id == team_id].team_in_possession_phase_type.value_counts()


team_in_possession_phase_type
create         79
finish         61
chaotic        49
build_up       24
set_play       16
direct         11
transition      3
quick_break     3
Name: count, dtype: int64

In [16]:
df_phases[df_phases.team_in_possession_id != team_id].team_out_of_possession_phase_type.value_counts()

team_out_of_possession_phase_type
medium_block          77
chaotic               48
high_block            39
low_block             28
defending_direct      12
defending_set_play     4
Name: count, dtype: int64

In [17]:
def add_team_phase_of_play_info(
    df: pd.DataFrame,
    my_team_id: int,
) -> pd.DataFrame:
    """
    Add team-relative phases-of-play and possession-context columns.

    The phases-of-play data is typically defined relative to the team in
    possession (e.g., `team_in_possession_phase_type`). This function adds
    derived columns that are relative to `my_team_id`, so that "team phase" and
    "opponent phase" are consistent regardless of whether the team is in or out
    of possession in a given frame.

    It also derives team-relative possession start/end locations (third/channel)
    and several boolean flags describing turnovers and penalty-area context.

    Parameters
    ----------
    df : pandas.DataFrame
        Tracking (or tracking+phases) DataFrame containing, at minimum:
        - `team_in_possession_id`
        - `team_in_possession_phase_type`, `team_out_of_possession_phase_type`
        - `possession_third_start`, `possession_third_end`
        - `possession_channel_start`, `possession_channel_end`
        - `possession_penalty_area_start`, `possession_penalty_area_end`
        - `team_possession_loss_in_phase`
        Typically this is the output of `merge_phases_into_tracking`.
    my_team_id : int
        Team identifier for the team of interest.

    Returns
    -------
    pandas.DataFrame
        A copy of `df` with additional columns, including:
        - `team_in_possession` (bool)
        - `team_phase_type`, `opponent_phase_type`
        - `possession_start_team_third`, `possession_end_team_third`
        - `possession_start_team_channel`, `possession_end_team_channel`
        - `team_loss_in_possession`, `team_recovery_in_possession`
        - `possession_ends_in_opponent_box`, `possession_ends_in_team_box`
        - `possession_starts_in_opponent_box`, `possession_starts_in_team_box`

    Notes
    -----
    - Third/channel inversion is applied when `my_team_id` is NOT the team in
      possession for the frame, so that the start/end locations remain
      team-relative.
    - `team_possession_loss_in_phase` is interpreted as "the team in possession
      lost the ball during the phase". Therefore:
        - `team_loss_in_possession` is True when `my_team_id` was in possession
          and loss occurred.
        - `team_recovery_in_possession` is True when `my_team_id` was out of
          possession and the in-possession team lost the ball (i.e., my team
          recovered possession).
    """
    df = df.copy()

    
    df["team_in_possession"] = df["team_in_possession_id"] == my_team_id

    # print(df["team_in_possession"] == df["in_possession"])

    df["team_phase_type"] = np.where(
        df["team_in_possession"],
        df["team_in_possession_phase_type"],
        df["team_out_of_possession_phase_type"],
    )

    df["opponent_phase_type"] = np.where(
        df["team_in_possession"],
        df["team_out_of_possession_phase_type"],
        df["team_in_possession_phase_type"],
    )


    df["possession_start_team_third"] = np.where(
        df["team_in_possession"],
        df["possession_third_start"],
        df["possession_third_start"].map(invert_third),
    )

    df["possession_end_team_third"] = np.where(
        df["team_in_possession"],
        df["possession_third_end"],
        df["possession_third_end"].map(invert_third),
    )

    df["possession_start_team_channel"] = np.where(
        df["team_in_possession"],
        df["possession_channel_start"],
        df["possession_channel_start"].map(invert_channel)
    )

    df["possession_end_team_channel"] = np.where(
        df["team_in_possession"],
        df["possession_channel_end"],
        df["possession_channel_end"].map(invert_channel)
    )

    df["team_loss_in_possession"] = (
        df["team_in_possession"] &
        df["team_possession_loss_in_phase"]
    )

    df["team_recovery_in_possession"] = (
        (~df["team_in_possession"]) &
        df["team_possession_loss_in_phase"]
    )

    df["possession_ends_in_opponent_box"] = (
        df["team_in_possession"] & df["possession_penalty_area_end"]
    )

    df["possession_ends_in_team_box"] = (
        (~df["team_in_possession"]) & df["possession_penalty_area_end"]
    )

    df["possession_starts_in_opponent_box"] = (
        df["team_in_possession"] & df["possession_penalty_area_start"]
    )

    df["possession_starts_in_team_box"] = (
        (~df["team_in_possession"]) & df["possession_penalty_area_start"]
    )

    return df

In [18]:
tracking_df = pre.prepare_team_tracking(match_id, team_id, is_home_team=True,include_phases_of_play = False,)

  .apply(pick_gk_for_frame)


In [19]:
# tracking_df = pre.add_in_possession_column(tracking_df, team_id)
tracking_df = pre.add_phases_of_play_info(tracking_df, match_id, team_id)

frames duplicados: 0


In [20]:
tracking_df_in = tracking_df[tracking_df.team_in_possession_id == team_id]
tracking_df_out = tracking_df[tracking_df.team_in_possession_id != team_id]

In [21]:
tracking_in = tracking_df[tracking_df.in_possession == True]

In [22]:
tracking_df_in.team_phase_type.value_counts()

team_phase_type
create         33136
finish         23478
build_up       10897
set_play        7238
chaotic         5940
transition      2159
direct          1849
quick_break     1109
Name: count, dtype: int64

In [23]:
tracking_in.team_phase_type.value_counts()

team_phase_type
create         33136
finish         23478
build_up       10897
set_play        7238
chaotic         5940
transition      2159
direct          1849
quick_break     1109
Name: count, dtype: int64

In [29]:
tracking_df_out.team_phase_type.value_counts() 

team_phase_type
medium_block          30502
high_block            16896
low_block              7248
chaotic                7113
defending_set_play     1910
defending_direct       1830
Name: count, dtype: int64

In [26]:
tracking_df.columns

Index(['frame_id', 'timestamp', 'period_id', 'ball_state',
       'ball_owning_team_id', 'ball_x', 'ball_y', 'player_id', 'x', 'y',
       'jersey_no', 'first_name', 'last_name', 'name', 'team_id', 'position',
       'match_time_s', 'match_time_td', 'in_possession', 'ball_zone_x',
       'ball_zone_y', 'ball_zone_label', 'possession_id', 'match_id',
       'possession_duration_s', 'possession_channel_start',
       'possession_third_start', 'possession_penalty_area_start',
       'possession_channel_end', 'possession_third_end',
       'possession_penalty_area_end', 'team_in_possession_id',
       'team_in_possession_phase_type', 'team_in_possession_phase_type_id',
       'team_out_of_possession_phase_type',
       'team_out_of_possession_phase_type_id', 'possession_lead_to_shot',
       'possession_lead_to_goal', 'team_possession_loss_in_phase',
       'n_player_possessions_in_phase', 'team_in_possession',
       'team_phase_type', 'opponent_phase_type', 'possession_start_team_third',

In [None]:
def merge_phases_into_tracking(
    tracking_df: pd.DataFrame,
    phases_per_frame: pd.DataFrame,
) -> pd.DataFrame:
    """
    Hace un left-join por frame_id para añadir info de phases_of_play
    al tracking frame a frame.
    """
    merged = tracking_df.merge(
        phases_per_frame,
        on="frame_id",
        how="left",
        validate="many_to_one",  # muchos frames de tracking → una fila en phases_per_frame
    )
    return merged

In [None]:
def add_team_phase_of_play_info(
    df: pd.DataFrame,
    my_team_id: int,
    col_name: str = "team_phase_type",
) -> pd.DataFrame:
    """
    Añade una columna con la fase relevante para mi equipo:
    - Si mi equipo está en posesión en ese frame -> phase = team_in_possession_phase_type
    - Si no -> phase = team_out_of_possession_phase_type
    """
    df = df.copy()

    # ¿Mi equipo está en posesión en este frame?
    df["team_in_possession"] = df["team_in_possession_id"] == my_team_id

    df[col_name] = np.where(
        df["team_in_possession"],
        df["team_in_possession_phase_type"],
        df["team_out_of_possession_phase_type"],
    )

    return df


In [None]:
tracking_df.columns

Index(['frame_id', 'timestamp', 'period_id', 'ball_state',
       'ball_owning_team_id', 'ball_x', 'ball_y', 'player_id', 'x', 'y',
       'jersey_no', 'first_name', 'last_name', 'name', 'team_id', 'position',
       'index', 'match_id', 'team_in_possession_id',
       'team_in_possession_phase_type', 'team_in_possession_phase_type_id',
       'team_out_of_possession_phase_type',
       'team_out_of_possession_phase_type_id', 'team_possession_lead_to_shot',
       'team_possession_lead_to_goal', 'team_possession_loss_in_phase',
       'n_player_possessions_in_phase', 'team_in_possession',
       'team_phase_type', 'match_time_s', 'match_time_td', 'in_possession',
       'ball_zone_x', 'ball_zone_y', 'ball_zone_label'],
      dtype='object')