In [1]:
# Make table of general_metrics with metric and description in two columns
import pandas as pd
from get_position_specific_metrics_statsbomb import get_player_metrics
import ast

'''
Need to implement for single or mutliple positions_groups: actually,
 just have one other column which has the information that is currently
 in the caotions. still, it should handle multiple positions groups.
 for every season, with the same position group.
'''

# Load in legacy and updated subscription data

base_path = '/Users/metinyarici/Library/CloudStorage/OneDrive-SharedLibraries-LincolnCityFC/Player Recruitment - Data Science/statsbomb_things/'
df = pd.read_csv(base_path+'data/player_season_stats.csv')

# Cross platform information
cross_platform_path = '/Users/metinyarici/Library/CloudStorage/OneDrive-SharedLibraries-LincolnCityFC/Player Recruitment - Data Science/cross_platform/'
season_information = pd.read_csv(cross_platform_path+'season_information.csv')
position_information = pd.read_csv(cross_platform_path+'position_information.csv')
metric_grouping_information = pd.read_csv(cross_platform_path+'statsbomb_metric_groups.csv')

# Fix list in csvs to match python style
metric_grouping_information['positions_statsbomb'] = metric_grouping_information['positions_statsbomb'].apply(lambda x: ','.join([f"'{item}'" for item in x.split(',')]))
metric_grouping_information['statsbomb_metrics'] = metric_grouping_information['statsbomb_metrics'].apply(lambda x: ','.join([f"'{item}'" for item in x.split(',')]))
metric_grouping_information['positions_statsbomb'] = metric_grouping_information['positions_statsbomb'].apply(ast.literal_eval)
metric_grouping_information['statsbomb_metrics'] = metric_grouping_information['statsbomb_metrics'].apply(ast.literal_eval)

def process_positions(value):
    if isinstance(value, str):
        return [value.strip()]
    else:
        return [item.strip() for item in value]

metric_grouping_information['positions_statsbomb'] = metric_grouping_information['positions_statsbomb'].apply(process_positions)
metric_grouping_information['statsbomb_metrics'] = metric_grouping_information['statsbomb_metrics'].apply(process_positions)

# Season information
chronological_season_ids = season_information['statsbomb_season_id']


'''
Pre-processing
'''

# Clean col names in df
df.columns = df.columns.str.strip().str.replace(' ', '_').str.lower()

# Remove 'player_season' from any column names that include it
df.rename(columns=lambda x: x.replace('player_season_', ''), inplace=True)

# Strip and lowercase and add underscores where spaces exists between words for values in 'primary_position'
df['primary_position'] = df['primary_position'].str.strip().str.lower().str.replace(' ', '_')

# Strip and lowercase values in competition_name, also replace spaces with '_'
df['competition_name'] = df['competition_name'].str.strip().str.lower().str.replace(' ', '_')

# Strip and lowercase values in player_name
df['player_name'] = df['player_name'].str.strip().str.lower().str.replace(' ', '_')

# Create goals - Xg fetaure
df['np_goals_less_xg_90'] = df['npga_90']-df['assists_90']-df['np_xg_90']


# Keep only rows with highest value in 'minutes' if multiple rows share the same player_id, competition_id and season_id

# Assuming your DataFrame is named 'df'
df = df.sort_values(['player_id', 'competition_id', 'season_id', 'minutes'], ascending=[True, True, True, False])

# Keep only the first occurrence of each group after sorting, which will have the highest 'minutes'
df = df.drop_duplicates(subset=['player_id', 'competition_id', 'season_id'], keep='first')

# Optionally, if you want to reset the index after dropping duplicates
df = df.reset_index(drop=True)

'''
Search for particular player
'''


particular_id = 31663 #Baccay

# Find all data for this player
player_df = df[df['player_id'] == particular_id]
particular_player = player_df['player_name'].iloc[0]


descriptions = pd.read_csv('/Users/metinyarici/Library/CloudStorage/OneDrive-SharedLibraries-LincolnCityFC/Player Recruitment - Data Science/cross_platform/metric_descriptions.csv')
position_group_current = []
for i, (index, row) in enumerate(player_df.iterrows()):
    position_group, general_metrics, comparable_positions = get_player_metrics(metric_grouping_information, row)
    if position_group not in position_group_current:
        position_group_current.append(position_group)

        latex_table = "\\begin{table}[h!]\n\\centering\n"
  
        # Create caption and label
        caption = f"\\caption{{Position Group metrics for {row['primary_position'].replace('_', ' ').title()}s}}\n"
        label = "\\label{tab:metrics}\n"
        latex_table += caption + label

        # Create table header
        latex_table += "\\resizebox{\\textwidth}{!}{%\n\\begin{tabular}{|p{0.4\\textwidth}|p{0.6\\textwidth}|}\n\\hline\n"
        latex_table += "Metric & Description \\\\\n\\hline\n"

        # Add metrics to the table with descriptions
        for metric in general_metrics:
            description = descriptions[descriptions['metric'] == metric]['description'].iloc[0]
            latex_table += f"{metric.replace('_', ' ')} & {description} \\\\\n\\hline\n"
    
        latex_table += "\\end{tabular}%\n}\n\\end{table}\n\n"
    
        # Print or save the LaTeX table
        print(latex_table)

\begin{table}[h!]
\centering
\caption{Position Group metrics for Josef Brian Baccay in Season: 2022, Competition: Eliteserien, while playing as a Left Back}
\label{tab:metrics}
\resizebox{\textwidth}{!}{%
\begin{tabular}{|p{0.4\textwidth}|p{0.6\textwidth}|}
\hline
Metric & Description \\
\hline
padj tackles 90 & count of tackles adjusted for periods under pressure \\
\hline
padj interceptions 90 & count of interceptions adjusted for periods under pressure \\
\hline
dribbled past 90 & count of times an opposition player dribbles past the player \\
\hline
aerial ratio & ratio of successful aerial duels \\
\hline
crossing ratio & ratio of successful crosses \\
\hline
obv dribble carry 90 & on-ball-value of carries: aggregate increase in probability of team scoring a goal as a result of the player's ball-carrying (dribbling) contributions \\
\hline
challenge ratio & ratio of successful challenges (ground and aerial)  \\
\hline
obv defensive action 90 & on-ball-value of defensive actions: a

In [2]:
descriptions

Unnamed: 0,metric,description
0,np_goals_less_xg_90,non-penalty goals minus expected goals: quanti...
1,xa_90,expected assists
2,aerial_ratio,ratio of successful aerial duels
3,shot_on_target_ratio,ratio of shots on target
4,crossing_ratio,ratio of successful crosses
5,obv_pass_90,on-ball-value of passes: aggregate increase in...
6,obv_dribble_carry_90,on-ball-value of carries: aggregate increase i...
7,challenge_ratio,ratio of successful challenges (ground and aer...
8,obv_defensive_action_90,on-ball-value of defensive actions: aggregate ...
9,padj_tackles_90,count of tackles adjusted for periods under pr...
