In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load data from Excel file
df = pd.read_excel(r'/Users/marclambertes/Downloads/Wyscout/Premier League 2023-2024.xlsx', index_col='Player')
df = df[df['Minutes played'] >= 500]

#df = df[df['League'] == 'Eredivisie']
#df = df[(df['League'] != 'Belarus') & (df['League'] != 'Russian Super League') & (df['League'] != 'Ukraine')]
#df = df[(df['Team'] != 'Chelsea')]


cols_to_drop = ['Minutes played', 'Matches played', 'Birth country',
                'Passport country', 'Height', 'Weight',]
df = df.drop(columns=cols_to_drop)

# Check for missing values and fill with 0
if df.isna().values.any():
    df = df.fillna(0)

# Select 'Team', 'Team within selected timeframe', 'League', 'Position' columns and numeric columns
num_cols = [col for col in df.columns if df[col].dtype in ['int64', 'float64']]
df = df[['Team', 'Team within selected timeframe', 'Position'] + num_cols]

# Calculate z-scores
scaler = StandardScaler()
df[num_cols] = scaler.fit_transform(df[num_cols])

# Calculate cosine similarity
cosine_sim = cosine_similarity(df[num_cols])

# Function to get most similar players
def get_similar_players(player_name, top_n=300):
    index = df.index.get_loc(player_name)
    sim_scores = list(enumerate(cosine_sim[index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    top_scores = sim_scores[1:top_n + 1]
    similar_players = [(df.index[i], df.iloc[i]['Team within selected timeframe'],
                        df.iloc[i]['Position'], score) for i, score in top_scores]
    return similar_players

# Input player name
player_name = input("Enter a player name: ")
similar_players = get_similar_players(player_name)

# Normalize similarity scores to 0-100%
max_similarity = max(similar_players, key=lambda x: x[3])[3]
similar_players = [(player, team, position, score / max_similarity * 100) for player, team, position, score in similar_players]

# Print similar players
print(f"Players similar to {player_name}:")
for player, team, position, score in similar_players:
    print(f"Player: {player}\nTeam: {team}\nPosition: {position}\nSimilarity Score: {score}%\n")

# Write similar players to Excel file
output_filename = r'/Users/marclambertes/Python/similar players.xlsx'
similar_players_df = pd.DataFrame(similar_players, columns=['Player', 'Team', 'Position', 'Similarity Score'])
similar_players_df.to_excel(output_filename, index=False)
print(f"Similar players saved to {output_filename}")
