In [None]:
%reload_ext autoreload
%autoreload 2

# player

> Fetch player starts for individual games.

In [None]:
#| default_exp player

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import pandas as pd
import typing
from collections import Counter
from math import sqrt

In [None]:
#| export
def fetch_all_players():
    """Fetches all players in the rugbypy manifest file."""
    try:
        player_manifest_url = "https://raw.githubusercontent.com/seanyboi/rugbydata/main/data/v3/player/player_registry.parquet"
        players = pd.read_parquet(player_manifest_url, engine="pyarrow")
        return players
    except Exception as _:
        print(
            "No player manifest exists. Please raise an issue!"
        )

In [None]:
fetch_all_players()

Unnamed: 0,player_id,player_name
0,9f0d99ce,Morgan Morris
1,e31a204b,James Ratti
2,b1d2895a,Max Nagy
3,25bfea05,Daniel Kasende
4,24717f78,Jac Morgan
...,...,...
8439,b4a9513f,Paul Mullen
8440,edf9d7e1,Tom Baraer
8441,da8da81b,Nathan Van de Ven
8442,d5c9a047,Nicolas Gali


In [None]:
#| exporti
def word2vec(word):

    # Count the number of characters in each word.
    count_characters = Counter(word)

    # Gets the set of characters and calculates the "length" of the vector.
    set_characters = set(count_characters)

    length = sqrt(sum(c*c for c in count_characters.values()))

    return count_characters, set_characters, length, word

In [None]:
#| exporti
def cosine_similarity(vector1, vector2, ndigits):
    
    # Get the common characters between the two character sets
    common_characters = vector1[1].intersection(vector2[1])

    # Sum of the product of each intersection character.
    product_summation = sum(vector1[0][character] * vector2[0][character] for character in common_characters)

    # Gets the length of each vector from the word2vec output.
    length = vector1[2] * vector2[2]

    # Calculates cosine similarity and rounds the value to ndigits decimal places.
    if length == 0:
        # Set value to 0 if word is empty.
        similarity = 0
    else:
        similarity = round(product_summation/length, ndigits)

    return similarity

In [None]:
#| export
def fetch_player_id(name: str # the name of the rugby player you wish to fetch the id for.
                ):
    """Uses a similarity tool to fetch a players id"""
    players = fetch_all_players()
    all_players = players["player_name"].to_list()
    # Initiate an empty list to store results.
    results_list = []

    # Apply word2vec function to each name and store them in a list.
    vector_list = [word2vec(str(i)) for i in all_players]
    vector_name = word2vec(str(name))
    # Two loops to compare each vector with another vector only once.
    for i in range(len(vector_list)):
        # Get first vector
        vector1 = vector_list[i]

        # Calculate cosine similarity
        similarity_score = cosine_similarity(vector1, vector_name, 3)

        # Append to results list if similarity score is between 1 and the threshold.
        # Note that scores of 1 can be ignored here if we want to exclude people with the same name.
        if 1 >= similarity_score >= 0.75:
            results_list.append([vector1[3], vector_name[3], similarity_score])

        else:
            pass
    # Convert list to dataframe.
    results_df = pd.DataFrame(results_list)
    if len(results_df) != 0:
        results_df.columns = ['player_name', 'comparison_name', 'similarity_score']
        results_df = results_df.sort_values(by="similarity_score", ascending=False).head(3)
        player_id = players.loc[players['player_name'].isin(results_df["player_name"].to_list())]
        return player_id
    else:
        # Can add error here if there's no results to return if desired.
        print(f"Apologies, we could not find a match for {name}. Please try again or raise an issue!")


In [None]:
fetch_player_id("jac morgan")

Unnamed: 0,player_id,player_name
4,24717f78,Jac Morgan
3518,2c8b48a5,Francesco Braga
5860,e387e07f,Franco Lamanna


In [None]:
#| export
def fetch_player_stats(player_id: str, # the player id of a particular player.
                       date: typing.Optional[str] = None, # the date of a particular match you wish to fetch the player stats for.
                       match_id: typing.Optional[str] = None, # the match id of a particular match you wish to fetch the player stats for.
                       competition_id: typing.Optional[str] = None # the competition you wish to fetch the player stats for.
                       ) -> pd.DataFrame:
    """
    Fetches player statistics for a specific player with optional filtering. \n
    Returns all stats by default, or filters by date, match, or competition when specified. \n
    Only one optional filter parameter should be provided at a time. \n

    Args:
        player_id (str): The unique identifier for the player
        date (typing.Optional[str]): Filter by game date in YYYYMMDD format (default: None)
        match_id (typing.Optional[str]): Filter by specific match identifier (default: None)
        competition_id (typing.Optional[str]): Filter by competition identifier (default: None)

    Returns:
        pd.DataFrame: Player statistics, filtered if optional parameter provided,
                     or all stats if no filter specified
    """
    
    try:

        player_url = f"https://raw.githubusercontent.com/seanyboi/rugbydata/main/data/v3/player/{player_id}.parquet"
        player_stats = pd.read_parquet(player_url, engine="pyarrow")

        if date:
            print(f"Fetching player stats for player_id: {player_id} on date: {date}...")
            player_stats = player_stats.query('game_date == @date')
        elif match_id:
            print(f"Fetching player stats for player_id: {player_id} in match_id: {match_id}...")
            player_stats = player_stats.query('match_id == @match_id')
        elif competition_id:
            print(f"Fetching player stats for player_id: {player_id} in competition_id: {competition_id}...")
            player_stats = player_stats.query('competition_id == @competition_id')
        else:
            print(f"Fetching all player stats for player_id: {player_id}...")

        return player_stats
    
    except Exception as _:
        print(    
            f"No player stats for {player_id} because the player id does not exist. Please raise an issue!"
        )

In [None]:
fetch_player_stats(player_id="24717f78")

Fetching all player stats for player_id: 24717f78...


Unnamed: 0,player_id,name,team,team_id,position,carries,line_breaks,tackles_completed,turnovers_lost,turnovers_won,...,offload,penalty_goals,points,rucks_won,runs,tackles,total_free_kicks_conceded,total_lineouts,turnover_knock_on,turnovers_conceded
0,24717f78,Jac Morgan,Ospreys,5fa975a5,blindside_flanker,9.0,0.0,19.0,0.0,0.0,...,,,,,,,,,,
1,24717f78,Jac Morgan,Ospreys,5fa975a5,blindside_flanker,11.0,0.0,12.0,1.0,1.0,...,,,,,,,,,,
2,24717f78,Jac Morgan,Ospreys,5fa975a5,openside_flanker,10.0,0.0,30.0,0.0,2.0,...,,,,,,,,,,
3,24717f78,Jac Morgan,Wales,b6319444,flanker,15.0,1.0,,,,...,1.0,0.0,0.0,12.0,15.0,21.0,0.0,0.0,2.0,3.0
4,24717f78,Jac Morgan,Wales,b6319444,flanker,8.0,0.0,,,,...,0.0,0.0,0.0,7.0,8.0,13.0,0.0,0.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,24717f78,Jac Morgan,Wales,b6319444,openside_flanker,14.0,0.0,27.0,0.0,1.0,...,,,,,,,,,,
68,24717f78,Jac Morgan,Wales,b6319444,openside_flanker,11.0,0.0,28.0,0.0,1.0,...,,,,,,,,,,
69,24717f78,Jac Morgan,Ospreys,5fa975a5,number_eight,13.0,1.0,12.0,0.0,3.0,...,,,,,,,,,,
70,24717f78,Jac Morgan,Ospreys,5fa975a5,openside_flanker,9.0,0.0,11.0,0.0,2.0,...,,,,,,,,,,


In [None]:
fetch_player_stats(player_id="24717f78", date="20250101") # works the same with fetch_player_stats(player_id="24717f78", match_id="some_match_id")

Fetching player stats for player_id: 24717f78 on date: 20250101...


Unnamed: 0,player_id,name,team,team_id,position,carries,line_breaks,tackles_completed,turnovers_lost,turnovers_won,...,offload,penalty_goals,points,rucks_won,runs,tackles,total_free_kicks_conceded,total_lineouts,turnover_knock_on,turnovers_conceded
0,24717f78,Jac Morgan,Ospreys,5fa975a5,blindside_flanker,9.0,0.0,19.0,0.0,0.0,...,,,,,,,,,,


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()