In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# 1. LOAD DATA
df = pd.read_csv('players_integration.csv')

# --- PREPROCESSING & HELPER FUNCTIONS ---

def safe_div(a, b):
    return np.divide(a, b, out=np.zeros_like(a, dtype=float), where=b!=0)

# Normalize standard columns to Per 90
df['Mins_Per_90'] = df['Min_Playing.Time'] / 90.0

# ---------------------------------------------------------
# 2. CORE UNIVERSAL COMPOSITES
# ---------------------------------------------------------

# 2.1 Offensive Output Index (OOI)
df['OOI'] = safe_div(
    (df['npxG_Expected_Shoot'] + df['xAG_Expected']),
    df['Mins_Per_90']
)

# 2.2 Defensive Contribution Index (DCI)
df['DCI'] = safe_div(
    (df['Tkl_Tackles'] + df['Int_Def'] + df['Clr'] + df['Blocks_Blocks']),
    df['Mins_Per_90']
)

# 2.3 Ball Progression Index (BPI)
df['BPI'] = safe_div(
    (df['PrgC_Carries'] + df['PrgP_Progression'] + df['PrgR_Progression']),
    df['Mins_Per_90']
)

# 2.4 Shooting Efficiency Index (SEI)
# Formula: (G/SoT * SoT%) + (npxG/Sh * 0.5)
g_per_sot = safe_div(df['Gls_Standard'], df['SoT_Standard'])
npxg_per_sh = safe_div(df['npxG_Expected_Shoot'], df['Sh_Standard'])

df['SEI'] = (g_per_sot * (df['SoT_percent_Standard'] / 100)) + (npxg_per_sh * 0.5)

# 2.5 Passing Effectiveness Index (PEI)
# Formula: (Cmp% * 0.3) + (KP/Att * 100 * 0.4) + (Final_Third/Att * 100 * 0.3)
kp_rate = safe_div(df['KP'], df['Att_Total']) * 100
final_third_rate = safe_div(df['Final_Third'], df['Att_Total']) * 100

df['PEI'] = (
    (df['Cmp_percent_Total'] * 0.3) +
    (kp_rate * 0.4) +
    (final_third_rate * 0.3)
)

# ---------------------------------------------------------
# 3. POSITION-SPECIFIC EFFICIENCY (PSE)
# ---------------------------------------------------------

df['PSE'] = 0.0

# 3.1 Forwards (FW)
# CORRECTION: Used 'npxG_Expected_Shoot'
dribble_success = safe_div(df['Succ_Take'], df['Att_Take'])
pse_fw = npxg_per_sh * dribble_success * 100

# 3.2 Midfielders (MF)
xag_per_kp = safe_div(df['xAG_Expected'], df['KP'])
prgp_rate = safe_div(df['PrgP_Progression'], df['Att_Total'])
pse_mf = xag_per_kp * (df['Cmp_percent_Total'] / 100) * prgp_rate

# 3.3 Defenders (DF)
def_actions = df['Tkl_Tackles'] + df['Int_Def']
def_failures = df['Lost_Challenges'] + df['Err']
pse_df = safe_div(def_actions, (def_actions + def_failures + 1))

# 3.4 Fullbacks
crs_per_90 = safe_div(df['Crs'], df['Mins_Per_90'])
pse_fb = (0.5 * def_actions) + (0.3 * df['PrgC_Carries']) + (0.2 * crs_per_90)

# APPLY PSE LOGIC
mask_fw = df['Pos'].str.contains('FW', na=False) | df['Pos'].str.contains('LW', na=False) | df['Pos'].str.contains('RW', na=False)
mask_mf = df['Pos'].str.contains('MF', na=False)
mask_df = df['Pos'].str.contains('DF', na=False) & ~df['Pos'].str.contains('FW', na=False)

df.loc[mask_fw, 'PSE'] = pse_fw[mask_fw]
df.loc[mask_mf, 'PSE'] = pse_mf[mask_mf]
df.loc[mask_df, 'PSE'] = pse_df[mask_df]

# ---------------------------------------------------------
# 4. RELIABILITY & VALUE METRICS
# ---------------------------------------------------------

# 4.1 Defensive Reliability Index (DRI)
df['DRI'] = pse_df * df['Mins_Per_90']

# 4.2 Aerial Dominance
df['Aerial_Efficiency'] = safe_div(df['Won_Aerial'], (df['Won_Aerial'] + df['Lost_Aerial'])) * 100
df['Aerial_Volume'] = safe_div((df['Won_Aerial'] + df['Lost_Aerial']), df['Mins_Per_90'])

# 4.3 Consistency Score (CS)
consistency_stats = df.groupby('Player')['OOI'].agg(['mean', 'std'])
consistency_stats['CV'] = safe_div(consistency_stats['std'], consistency_stats['mean'])
consistency_stats['CS'] = 1 - consistency_stats['CV']
consistency_stats['CS'] = consistency_stats['CS'].fillna(0.5)
df['CS'] = df['Player'].map(consistency_stats['CS'])

# 4.4 Age-Adjusted Performance (AAP)
total_contribution = df['OOI'] + df['DCI']

def get_age_multiplier(age):
    if 18 <= age <= 21: return 1.2
    elif 22 <= age <= 27: return 1.0
    elif 28 <= age <= 32: return 0.85
    else: return 0.7

df['Age_Multiplier'] = df['Age'].apply(get_age_multiplier)
df['AAP'] = total_contribution * df['Age_Multiplier']

# 4.5 Tactical Versatility Index (TVI)
df['TVI'] = df['Positions'].apply(lambda x: len(str(x).split(',')) / 11.0 * 100)

# 4.6 Market Value Efficiency (MVE)
# Placeholder for missing Market Value column
if 'Market_Value' not in df.columns:
    df['Market_Value'] = 10.0

df['MVE'] = (df['OOI'] + df['DCI'] + df['BPI']) / df['Market_Value']

# ---------------------------------------------------------
# 5. TEAM FIT SCORE (TFS)
# ---------------------------------------------------------
fit_features = ['OOI', 'DCI', 'BPI', 'SEI', 'PEI', 'Aerial_Efficiency']
df_fit = df[fit_features].fillna(0)

scaler = StandardScaler()
df_scaled = scaler.fit_transform(df_fit)
df_scaled = pd.DataFrame(df_scaled, columns=fit_features, index=df.index)

df_scaled['Team'] = df['Team']
df_scaled['Year'] = df['Season_End_Year']

team_vectors = df_scaled.groupby(['Team', 'Year'])[fit_features].mean()

def calculate_tfs(row):
    try:
        p_vec = row[fit_features].values.reshape(1, -1)
        t_vec = team_vectors.loc[(row['Team'], row['Year'])].values.reshape(1, -1)
        return cosine_similarity(p_vec, t_vec)[0][0]
    except:
        return 0.0

df_scaled_with_meta = df_scaled.copy()
df['TFS'] = df_scaled_with_meta.apply(calculate_tfs, axis=1)

# ---------------------------------------------------------
# 6. SAVE
# ---------------------------------------------------------
print("Composites Calculated Successfully.")
df.to_csv('players_with_composites.csv', index=False)
print(df[['Player', 'Team', 'TFS', 'MVE', 'OOI', 'DCI', 'PSE', 'CS']].head())

Composites Calculated Successfully.
                  Player      Team       TFS       MVE       OOI       DCI  \
0        aaron anselmino   chelsea -0.006742  0.000000  0.000000  0.000000   
1  aaron ciammaglichella    torino  0.225974  0.000000  0.000000  0.000000   
2  aaron ciammaglichella    torino  0.137647  0.000000  0.000000  0.000000   
3  aaron ciammaglichella    torino  0.986682  0.000000  0.000000  0.000000   
4        aaron cresswell  west ham -0.259696  1.341483  0.212934  4.883281   

        PSE        CS  
0  0.000000  1.000000  
1  0.000000  1.000000  
2  0.000000  1.000000  
3  0.000000  1.000000  
4  0.790123  0.691413  
