In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import joblib
import numpy as np

In [2]:
df = pd.read_csv('all_players.csv')

# Filter for only Goalkeepers
df_gk_sim = df[df['Position'] == 'GK'].copy()

# Define the attributes for GK similarity. 'Reactions' is a key stat for GKs too.
gk_similarity_columns = [
    'GK Diving', 'GK Handling', 'GK Kicking', 'GK Positioning', 'GK Reflexes', 'Reactions'
]

In [3]:
df_gk_sim.dropna(subset=gk_similarity_columns, inplace=True)

# Save the essential info for these GKs for later lookup
gk_player_info = df_gk_sim[['Name', 'OVR', 'Position', 'Team']].reset_index(drop=True)

In [4]:
X_gk_similarity = df_gk_sim[gk_similarity_columns]

# Scale the data so all attributes are weighted equally
gk_similarity_scaler = StandardScaler()
scaled_gk_attributes = gk_similarity_scaler.fit_transform(X_gk_similarity)

In [5]:
joblib.dump(gk_similarity_scaler, 'models/gk_similarity_scaler.pkl')
np.save('models/scaled_gk_attributes.npy', scaled_gk_attributes)
gk_player_info.to_csv('models/gk_player_info.csv', index=False)

print("Goalkeeper similarity components have been created and saved successfully!")

Goalkeeper similarity components have been created and saved successfully!
