# Calculate playing time by tournament

Use the same code as before in the "Loading_Data"-Notebook to load the available data.

In [4]:
from mplsoccer import Sbopen
import pandas as pd
import numpy as np

parser = Sbopen()
df_competition = parser.competition()
competitions360 = df_competition[
    (df_competition['match_available_360'].notna()) & 
    (df_competition["competition_name"].isin(["1. Bundesliga", "FIFA World Cup", "UEFA Euro"])) 
]
print(competitions360[['competition_name', 'season_name', 'competition_id', 'season_id']])

   competition_name season_name  competition_id  season_id
0     1. Bundesliga   2023/2024               9        281
29   FIFA World Cup        2022              43        106
68        UEFA Euro        2024              55        282
69        UEFA Euro        2020              55         43


In [6]:
matches_360_list = []

for _, row in competitions360.iterrows():
    comp_id = row['competition_id']
    season_id = row['season_id']
    
    df_matches = parser.match(competition_id=comp_id, season_id=season_id)
    matches_360_list.append(df_matches)

all_matches_360 = pd.concat(matches_360_list, ignore_index=True)

print(len(all_matches_360))



200


Now calculate the exact minutes each player played per seasoon/tournament, based on starting line-up and substitutions.

In [7]:
# Loop over all matches to get events and lineups
minutes_played_list = []

for match_id in all_matches_360['match_id']:
    # Load event and lineup data
    events, related, freeze, tactics = parser.event(match_id)
    lineup = parser.lineup(match_id)

    # Match info: competition and season
    match_row = all_matches_360[all_matches_360['match_id'] == match_id].iloc[0]
    competition_name = match_row['competition_name']
    season_name = match_row['season_name']

    # Get the real match maximum minute
    match_max_minute = events['minute'].max()
    if pd.isna(match_max_minute):
        match_max_minute = 90

    # Get substitution times
    time_off = events.loc[events.type_name == 'Substitution', ['player_id', 'minute']].rename(columns={'minute': 'off'})
    time_on = events.loc[events.type_name == 'Substitution', ['substitution_replacement_id', 'minute']].rename(columns={'substitution_replacement_id': 'player_id', 'minute': 'on'})

    # Merge substitution times onto lineup
    lineup = lineup.merge(time_on, on='player_id', how='left')
    lineup = lineup.merge(time_off, on='player_id', how='left')

    # Find starting players
    starting_ids = events[events.type_name == 'Starting XI'].id
    starting_xi = tactics[tactics.id.isin(starting_ids)]
    starting_players = starting_xi.player_id

    # Filter players who played
    mask_played = ((lineup.on.notnull()) | (lineup.off.notnull()) | (lineup.player_id.isin(starting_players)))
    lineup = lineup[mask_played].copy()

    # Calculate minutes played
    lineup['minutes_played'] = np.where(
        lineup['player_id'].isin(starting_players),
        np.where(lineup['off'].notnull(), lineup['off'], match_max_minute),
        np.where(lineup['on'].notnull(), match_max_minute - lineup['on'], 0)
    )

    # Adjust for players subbed on and subbed off
    lineup['minutes_played'] = np.where(
        (lineup['on'].notnull()) & (lineup['off'].notnull()),
        lineup['off'] - lineup['on'],
        lineup['minutes_played']
    )

    # Add match, competition, season info
    lineup['match_id'] = match_id
    lineup['competition_name'] = competition_name
    lineup['season_name'] = season_name

    # Save necessary columns
    minutes_played_list.append(
        lineup[['match_id', 'player_id', 'player_name', 'player_nickname', 'team_name', 
                'minutes_played', 'competition_name', 'season_name']]
    )

# Combine all matches
all_minutes_played = pd.concat(minutes_played_list, ignore_index=True)

# Group by competition, season, and player
player_minutes = all_minutes_played.groupby(
    ['competition_name', 'season_name', 'player_id', 'player_name', 'player_nickname', 'team_name'],
    as_index=False
)['minutes_played'].sum()

# See result
print(player_minutes.sort_values(['competition_name', 'season_name', 'minutes_played'], ascending=[True, True, False]))

     competition_name season_name  player_id                 player_name     player_nickname         team_name  minutes_played
86      1. Bundesliga   2023/2024       8667              Lukáš Hrádecký      Lukáš Hrádecký  Bayer Leverkusen          3110.0
7       1. Bundesliga   2023/2024       3500                Granit Xhaka        Granit Xhaka  Bayer Leverkusen          2934.0
141     1. Bundesliga   2023/2024      10336   Alejandro Grimaldo García       Álex Grimaldo  Bayer Leverkusen          2899.0
47      1. Bundesliga   2023/2024       8221                Jonathan Tah        Jonathan Tah  Bayer Leverkusen          2748.0
294     1. Bundesliga   2023/2024      40724               Florian Wirtz       Florian Wirtz  Bayer Leverkusen          2442.0
...               ...         ...        ...                         ...                 ...               ...             ...
1987        UEFA Euro        2024      49445  Michael Ijemuan Folorunsho  Michael Folorunsho             Italy 