In [None]:
%reload_ext autoreload
%autoreload 2

# match

> Fetch match stats for individual games.

In [None]:
#| default_exp match

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import pandas as pd
from concurrent.futures import Future, ThreadPoolExecutor, as_completed
from concurrent.futures import TimeoutError as FuturesTimeoutError
from typing import Any, TypeVar

from rugbypy.team import fetch_team_stats
from rugbypy.player import fetch_player_stats

# Type variable for generic return types
T = TypeVar("T")

In [None]:
#| export
def fetch_all_matches():
    """Fetches all match information that exists: \n
    * match_id \n
    * home_team \n
    * away_team \n
    * date
    """

    try:
        print("Fetching all matches...")
        date_url = "https://raw.githubusercontent.com/seanyboi/rugbydata/main/data/v3/match/match_registry.parquet"
        matches = pd.read_parquet(date_url, engine="pyarrow").sort_values(by="date", ascending=True)
        return matches
    except Exception as e:
        print(
            f"No match information either because no matches took place or rugbypy does not have access to the match data - {e}. Please raise an issue if neither."
        )

In [None]:
fetch_all_matches()

Fetching all matches...


Unnamed: 0,match_id,home_team,away_team,date
1498,cae3129b,Exeter Chiefs,Bristol Rugby,20220101
1499,494f4092,Scarlets,Ospreys,20220101
1500,77e4e537,Connacht,Munster,20220101
1501,fbca6594,Stade Francais Paris,Perpignan,20220101
1502,7863a641,Clermont Auvergne,Stade Toulousain,20220101
...,...,...,...,...
1121,2ac5db4a,Shizuoka Bluerevs,Urayasu D Rocks,20251228
1122,a68f4527,Toshiba Brave Lupus Tokyo,Yokohama Canon Eagles,20251228
1123,00adb234,Mitsubishi Sagamihara Dynaboars,Saitama Wild Knights,20251228
1117,e37207b8,Leicester,Exeter Chiefs,20251228


In [None]:
#| export
def fetch_matches_by_date(date: str):
    """Fetches all match information on a particular date in the format YYYYMMDD that includes: \n
    * match_id \n
    * competition_id \n
    * home_team_id \n
    * home_team_name \n
    * away_team_id \n
    * away_team_name \n
    """

    try:
        print(f"Fetching matches on date: {date}...")
        date_url = f"https://raw.githubusercontent.com/seanyboi/rugbydata/main/data/v3/dates/{date}.parquet"
        matches = pd.read_parquet(date_url, engine="pyarrow")
        return matches
    except Exception as _:
        print(
            f"No match information for matches played on {date} either because no matches took place or rugbypy does not have access to the match data. Please raise en issue if neither."
        )

In [None]:
fetch_matches_by_date(date="20251205")

Fetching matches on date: 20251205...


Unnamed: 0,match_id,competition_id,home_team_id,home_team,away_team_id,away_team
0,35e0b16d,ee0c6883,d7d74d92,Sale Sharks,6f08f859,Glasgow Warriors
1,f29de850,ee0c6883,fe2eda63,Bayonne,f54be954,Stormers
2,722ce3c7,83d92007,0aa2b3cc,Ulster,2e49b214,Racing 92
3,6afe8441,822142db,deee3415,Beziers,af22c862,Soyaux Angouleme
4,719b1db1,822142db,93d1e116,Carcassonne,34149c5c,Vannes
5,3aa1bb81,822142db,98fce2f7,Biarritz,791d6c23,Valence Romans
6,575235eb,822142db,870e9fb0,Aurillac,d5468acc,Brive
7,66a1c770,822142db,6992ab09,Dax,8b15a375,Uso Nevers
8,f196da5a,822142db,2bac4677,Agen,4f43e2ca,Provence Rugby
9,416f317e,822142db,6de7a527,Mont De Marsan,53983719,Oyonnax


In [None]:
#| export
def fetch_match_details(match_id: str):
    """Fetches match data for a certain match_id"""
    print(f"Fetching match details for match_id: {match_id}...")
    try:
        match_url = f"https://raw.githubusercontent.com/seanyboi/rugbydata/main/data/v3/match/{match_id}.parquet"
        matches = pd.read_parquet(match_url, engine="pyarrow")
        return matches
    except Exception as _:
        print("Error fetching match data. Please raise an issue!")

In [None]:
fetch_match_details(match_id="35e0b16d")

Fetching match details for match_id: 35e0b16d...


Unnamed: 0,match_id,date,season,competition_id,competition,venue_id,venue,city_played,home_team,away_team,...,completed,is_tournament,played_on_grass,attendance,home_team_form,away_team_form,kickoff_time,home_score,away_score,referee
0,35e0b16d,20251205,2025,ee0c6883,European Rugby Champions Cup,059580a0,CorpAcq Stadium,Salford,Sale Sharks,Glasgow Warriors,...,True,True,True,,WLWLL,WWWWL,14:30,21,26,


New function to fetch all players' stats for a given match

Idea : use the list of players for each team of a match. This ID is used for the example : 416f317e

In [None]:
#| export

def fetch_match_players_stats(match_id: str, max_workers: int = 1) -> pd.DataFrame:
    """Fetches player statistics for all players who participated in a specific match. \n
    Uses parallel processing to efficiently fetch stats for all players from both teams. \n

    Args:
        match_id (str): The unique identifier for the match
        max_workers (int): Number of parallel workers for fetching player stats (default: 1)

    Returns:
        pd.DataFrame: Combined DataFrame with statistics for all players in the match,
                     or empty DataFrame if no data available
    """

    def get_list_of_players(team_id: str, match_id: str):
        """Extracts the list of unique players for a team in a specific match.

        Args:
            team_id (str): The unique identifier for the team
            match_id (str): The unique identifier for the match

        Returns:
            list: List of unique player IDs, or empty list if no data found
        """
        team_stats = fetch_team_stats(team_id=team_id)
        try:
            players = set(team_stats[team_stats['match_id'] == match_id]['players'].iloc[0])
            list_players = list(players)
            return list_players

        except Exception as _:  
            print("No player data found for this team in the match.")
            return []

    match_details = fetch_match_details(match_id)

    if match_details.empty is False:
        home_team_players = get_list_of_players(
            team_id=match_details['home_team_id'].iloc[0],
            match_id=match_id
        )
        away_team_players = get_list_of_players(
            team_id=match_details['away_team_id'].iloc[0],
            match_id=match_id
        )

        results = list()
        
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            # Submit all tasks: map each future to its corresponding item
            future_to_item: dict[Future[T], Any] = {
                executor.submit(fetch_player_stats, player_id=player_id, match_id=match_id): player_id for player_id in home_team_players + away_team_players
            }

            # Collect results as they complete (not in submission order)
            for future in as_completed(future_to_item):
                item = future_to_item[future]
                try:
                    result = future.result()
                    results.append((item, result))
                    print(f"Successfully fetched data for item: {item}")
                except Exception as e:
                    # Exception already logged in _try_request, just capture here
                    print(f"Failed to fetch data for item: {item} with error: {e}")
                    results.append((item, e))

        # Combine all player stats DataFrames into a single DataFrame
        player_stats_list = [res[1] for res in results if isinstance(res[1], pd.DataFrame)]
        if player_stats_list:
            all_player_stats = pd.concat(player_stats_list, ignore_index=True)
            return all_player_stats
        else:
            print("No player stats data available for this match.")
            return pd.DataFrame()
    else:
        print("No match details available.")
        return pd.DataFrame()

        

In [None]:
fetch_match_players_stats(match_id="416f317e", max_workers=4)

Fetching match details for match_id: 416f317e...
Fetching all team stats for team_id: 6de7a527...
Fetching all team stats for team_id: 53983719...
Fetching player stats for player_id: 768a06a0 in match_id: 416f317e...
Successfully fetched data for item: 768a06a0
Fetching player stats for player_id: 60d9cf47 in match_id: 416f317e...
Fetching player stats for player_id: 291aafa3 in match_id: 416f317e...
Successfully fetched data for item: 60d9cf47
Successfully fetched data for item: 291aafa3
Fetching player stats for player_id: 54751009 in match_id: 416f317e...
Successfully fetched data for item: 54751009
Fetching player stats for player_id: 3c098414 in match_id: 416f317e...
Successfully fetched data for item: 3c098414
Fetching player stats for player_id: 6c96e05a in match_id: 416f317e...
Successfully fetched data for item: 6c96e05a
Fetching player stats for player_id: 36223dfe in match_id: 416f317e...
Successfully fetched data for item: 36223dfe
Fetching player stats for player_id: 5f73

Unnamed: 0,player_id,name,team,team_id,position,carries,line_breaks,tackles_completed,turnovers_lost,turnovers_won,...,offload,penalty_goals,points,rucks_won,runs,tackles,total_free_kicks_conceded,total_lineouts,turnover_knock_on,turnovers_conceded
0,768a06a0,Yannick Lodjro,Mont De Marsan,6de7a527,right_wing,5.0,1.0,6.0,3.0,0.0,...,,,,,,,,,,
1,60d9cf47,Joris Dupont,Mont De Marsan,6de7a527,outside_centre,4.0,0.0,5.0,0.0,0.0,...,,,,,,,,,,
2,291aafa3,Simao Bento,Mont De Marsan,6de7a527,fullback,11.0,0.0,3.0,2.0,0.0,...,,,,,,,,,,
3,54751009,Iban Laclau,Mont De Marsan,6de7a527,fly_half,5.0,0.0,6.0,2.0,0.0,...,,,,,,,,,,
4,3c098414,Christophe Loustalot,Mont De Marsan,6de7a527,forward_sub,1.0,0.0,1.0,0.0,0.0,...,,,,,,,,,,
5,6c96e05a,Raphael Robic,Mont De Marsan,6de7a527,blindside_flanker,5.0,2.0,8.0,0.0,0.0,...,,,,,,,,,,
6,36223dfe,Brent Liufau,Mont De Marsan,6de7a527,front_row_sub,4.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
7,5f7340ce,Gatien Masse,Mont De Marsan,6de7a527,left_wing,5.0,1.0,3.0,1.0,0.0,...,,,,,,,,,,
8,baa7630a,Luka Goginava,Mont De Marsan,6de7a527,front_row_sub,11.0,0.0,4.0,0.0,0.0,...,,,,,,,,,,
9,4ebab3df,Torsten van Jaarsveld,Mont De Marsan,6de7a527,hooker,2.0,0.0,15.0,1.0,0.0,...,,,,,,,,,,


In [None]:
#| export
def fetch_matches(date: str):
    print("Deprecated, please use the fetch_matches_by_date() function instead")

In [None]:
#| hide
import nbdev 
nbdev.nbdev_export()