In [10]:
import pandas as pd

# Load events_combined.csv
df = pd.read_csv("../data/interim/events_combined.csv")


In [11]:
# Check available player info
print(df.columns[df.columns.str.contains('player', case=False)])


Index(['player_name'], dtype='object')


In [13]:
# For example, using 'player_name' (you may need to adjust if player_id is not there)
df['player_name'] = df['player_name'].fillna("Unknown")
df['player_idx'] = df['player_name'].astype('category').cat.codes

# Create lookup table
player_index_lookup = df[['player_name', 'player_idx']].drop_duplicates()

# Create the folder if it doesn't exist
Path("../data/processed").mkdir(parents=True, exist_ok=True)
# Save it
player_index_lookup.to_csv("../data/processed/player_index_lookup.csv", index=False)


In [14]:
import arviz as az

# Load posterior
idata = az.from_netcdf("../data/interim/player_model_trace.nc")

# Extract mean player effects
player_effect_means = idata.posterior['player_effect'].mean(dim=["chain", "draw"]).values


In [16]:
# Create a new DataFrame with player_idx and impact
impact_df = pd.DataFrame({
    'player_idx': np.arange(len(player_effect_means)),
    'player_xg_impact': player_effect_means
})

# Merge using 'player_idx' to align correctly
player_impact_df = pd.merge(player_index_lookup, impact_df, on='player_idx', how='inner')

# Save to processed folder
from pathlib import Path
Path("../data/processed").mkdir(parents=True, exist_ok=True)
player_impact_df.to_csv("../data/processed/player_impact.csv", index=False)