In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import joblib
from sklearn.neighbors import NearestNeighbors

In [2]:
df = pd.read_csv("../raw_data/clean_data.csv")

In [3]:
df.columns

Index(['name', 'club', 'nat', 'position', 'dob', 'age', 'height', 'weight',
       'wage', 'last trans. fee', 'value', 'agg', 'jum', 'pun', 'vis', 'l th',
       'lon', 'otb', 'tck', 'tec', 'tea', 'cmp', 'fre', 'ref', 'pos', 'pen',
       'pas', 'fla', 'ant', 'cro', 'mar', 'ldr', 'cor', 'cnt', 'det', 'dec',
       'hea', 'fir', 'com', 'acc', 'pac', 'aer', 'str', 'thr', 'han', 'ecc',
       'dri', 'bal', 'kic', 'sta', 'agi', 'wor', 'bra', 'cmd', 'fin', '1v1',
       'tro', 'centerback', 'fullback', 'goalkeeper', 'midfielder', 'striker',
       'winger', 'division_rating', 'nat_rating', 'club_rating', 'either_left',
       'either_right', 'left', 'right'],
      dtype='object')

In [4]:
# Step 1: Drop rows where the club is equal to 'Other'
df = df[df['club'] != 'Other']

#  create fictive player

## create new dataframes for each position grouped by teams

In [5]:
def filter_position(df, club, position, max_count):
    filtered_df = df[(df['club'] == club) & (df[position] == 1)]
    return filtered_df.head(max_count)

def avg_position(df, club, position, max_count):
    filtered_df = filter_position(df, club, position, max_count)
    return filtered_df.groupby('club').mean()

def filter_and_append(avg_position_df, df, club, position, max_count):
    filtered_df = avg_position(df, club, position, max_count)
    return pd.concat([avg_position_df, filtered_df])

goalkeepers = pd.DataFrame()
centerbacks = pd.DataFrame()
fullbacks = pd.DataFrame()
midfielders = pd.DataFrame()
wingers = pd.DataFrame()
strikers = pd.DataFrame()

for club in df['club'].unique():
    goalkeepers = filter_and_append(goalkeepers, df, club, 'goalkeeper', 2)
    centerbacks = filter_and_append(centerbacks, df, club, 'centerback', 3)
    fullbacks = filter_and_append(fullbacks, df, club, 'fullback', 4)
    midfielders = filter_and_append(midfielders, df, club, 'midfielder', 5)
    wingers = filter_and_append(wingers, df, club, 'winger', 4)
    strikers = filter_and_append(strikers, df, club, 'striker', 2)

## grouped features

### definition

In [6]:
def create_grouped_features(df):
    # Offensive Skills
    df['shooting'] = df[['fin', 'lon', 'fre', 'pen']].mean(axis=1)
    df['dribbling_control'] = df[['dri', 'fir', 'fla', 'tec']].mean(axis=1)
    df['passing_vision'] = df[['pas', 'vis', 'l th', 'cro', 'cor', 'otb']].mean(axis=1)
    
    # Defensive Skills
    df['tackling_interception'] = df[['tck', 'mar', 'pos', 'ant']].mean(axis=1)
    df['aerial_defense'] = df[['hea', 'jum', 'aer']].mean(axis=1)
    
    # Physical Attributes
    df['speed_agility'] = df[['acc', 'pac', 'agi']].mean(axis=1)
    df['strength_stamina'] = df[['str', 'sta', 'bal']].mean(axis=1)
    
    # Mental and Tactical Attributes
    df['decision_making'] = df[['dec', 'cmp']].mean(axis=1)
    df['work_ethic_effort'] = df[['wor', 'det', 'bra']].mean(axis=1)
    
    # Leadership and Teamwork
    df['leadership'] = df[['ldr', 'com']].mean(axis=1)
    df['teamwork'] = df['tea']
    
    # Goalkeeping Abilities
    df['goalkeeping_abilities'] = df[['han', 'ref', 'kic', 'thr', '1v1', 'ecc', 'cmd']].mean(axis=1)
    
    # Behavioral Attributes
    df['behavioral_attributes'] = df[['agg', 'pun']].mean(axis=1)
    
    # Player Traits
    df['player_traits'] = df['tro']
    
    # Create a new DataFrame with the grouped features
    dfgf = df[['shooting', 'dribbling_control', 'passing_vision', 'tackling_interception', 
               'aerial_defense', 'speed_agility', 'strength_stamina', 'decision_making', 
               'work_ethic_effort', 'leadership', 'teamwork', 'goalkeeping_abilities', 
               'behavioral_attributes', 'player_traits', 'club_rating']]
    
    return dfgf

### positions dataframe

In [7]:
goalkeepers = create_grouped_features(goalkeepers)
centerbacks = create_grouped_features(centerbacks)
fullbacks = create_grouped_features(fullbacks)
midfielders = create_grouped_features(midfielders)
wingers = create_grouped_features(wingers)
strikers = create_grouped_features(strikers)

In [13]:
goalkeepers.index[:5]

Index(['Man City', 'Tottenham', 'Liverpool', 'Man Utd', 'Aston Villa'], dtype='object', name='club')

In [11]:
midfielders[midfielders.index == 'Club Brugge']

Unnamed: 0_level_0,shooting,dribbling_control,passing_vision,tackling_interception,aerial_defense,speed_agility,strength_stamina,decision_making,work_ethic_effort,leadership,teamwork,goalkeeping_abilities,behavioral_attributes,player_traits,club_rating
club,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Club Brugge,10.45,12.0,10.933333,9.35,7.733333,11.466667,11.333333,12.5,13.466667,6.8,14.6,1.971429,8.2,2.2,1657.0


In [10]:
midfielders.loc['Club Brugge']

shooting                   10.450000
dribbling_control          12.000000
passing_vision             10.933333
tackling_interception       9.350000
aerial_defense              7.733333
speed_agility              11.466667
strength_stamina           11.333333
decision_making            12.500000
work_ethic_effort          13.466667
leadership                  6.800000
teamwork                   14.600000
goalkeeping_abilities       1.971429
behavioral_attributes       8.200000
player_traits               2.200000
club_rating              1657.000000
Name: Club Brugge, dtype: float64

In [9]:
centerbacks.to_csv('../raw_data/centerback.csv', index=True)
fullbacks.to_csv('../raw_data/fullback.csv', index=True)
midfielders.to_csv('../raw_data/midfielder.csv', index=True)
wingers.to_csv('../raw_data/winger.csv', index=True)
strikers.to_csv('../raw_data/striker.csv', index=True)

def custom_scaler(dfgf_no_name):
    total_score = dfgf_no_name.sum(axis=1)
    scaled_dfgf_no_name = dfgf_no_name.div(total_score, axis=0)
    total_score = scaler.transform(total_score.values.reshape(-1, 1)).flatten()
    return total_score, scaled_dfgf_no_name

def compare_teams(df, team1, team2):
    df1 = df.loc[team1]
    df2 = df.loc[team2]
    total_score, result = custom_scaler(pd.DataFrame(df1.subtract(df2)).T.drop(columns=['club_rating']))
    result = result.clip(lower=0)
    result['scaled_total_score'] = abs(total_score)
    name = pd.DataFrame(data=["Nino_M"], columns=['name'])
    result = name.join(result)
    return result

### example short df

Nino = compare_teams(midfielders, 'Man City', 'Club Brugge')

Nino

### extend fictive player

#### foots

 Specify the dimensions of the DataFrame
rows = 4
columns = 4

 Create a DataFrame filled with zeros
foots = pd.DataFrame(np.zeros((rows, columns)), columns=[f'col{i+1}' for i in range(columns)])
new_column_names = ['either_left', 'either_right', 'left', 'right']
foots.columns = new_column_names
 Set the diagonal elements to 1
np.fill_diagonal(foots.values, 1)
foots

merged_foots = pd.DataFrame()
merged_foots = pd.concat([foots, foots, foots, foots, foots, foots])
merged_foots.index = [i // 4 for i in range(24)]
merged_foots

#### positions

 Specify the dimensions of the DataFrame
rows = 6
columns = 6

 Create a DataFrame filled with zeros
positions = pd.DataFrame(np.zeros((rows, columns)), columns=[f'col{i+1}' for i in range(columns)])
new_column_names = ['centerback', 'fullback', 'goalkeeper', 'midfielder', 'striker', 'winger']
positions.columns = new_column_names
 Set the diagonal elements to 1
np.fill_diagonal(positions.values, 1)
positions

final_df = positions.join(foots, how='cross')

final_df

#### merging

final_df = positions.join(merged_foots)
Nino = Nino.join(final_df, how='cross')

series = pd.Series([f'Nino_M_{i}' for i in range(24)])

Nino.name = series

Nino

Nino = Nino[Nino.goalkeeper != 1]
Nino.drop(columns=['goalkeeper', 'goalkeeping_abilities'], inplace=True)

Nino.reset_index(inplace=True, drop=True)

Nino

Nino_name = Nino.name
Nino_name

Nino.drop(columns=['name'], inplace=True)

Nino

# Part 2 Implementation fictive player in Model 

## df creation

### Custom features

In [10]:
df = pd.read_csv("../raw_data/clean_data.csv")

In [11]:
# Create a new dataset with the numerical features for the followings steps.
columns_to_remove = ['club', 'nat', 'position', 'dob', 'age', 'height', 'weight',
       'wage', 'last trans. fee', 'value', 'centerback', 'fullback', 'goalkeeper', 'midfielder', 'striker',
       'winger', 'division_rating', 'nat_rating', 'club_rating', 'either_left',
       'either_right', 'left', 'right']

In [12]:
# Remove specified columns
df.drop(columns=columns_to_remove, inplace=True)

In [13]:
# Grouping and creating new features by calculating the mean of each group

# Offensive Skills
df['shooting'] = df[['fin', 'lon', 'fre', 'pen']].mean(axis=1)
df['dribbling_control'] = df[['dri', 'fir', 'fla', 'tec']].mean(axis=1)
df['passing_vision'] = df[['pas', 'vis', 'l th', 'cro', 'cor', 'otb']].mean(axis=1)

# Defensive Skills
df['tackling_interception'] = df[['tck', 'mar', 'pos', 'ant']].mean(axis=1)
df['aerial_defense'] = df[['hea', 'jum', 'aer']].mean(axis=1)

# Physical Attributes
df['speed_agility'] = df[['acc', 'pac', 'agi']].mean(axis=1)
df['strength_stamina'] = df[['str', 'sta', 'bal']].mean(axis=1)

# Mental and Tactical Attributes
df['decision_making'] = df[['dec', 'cmp']].mean(axis=1)
df['work_ethic_effort'] = df[['wor', 'det', 'bra']].mean(axis=1)

# Leadership and Teamwork
df['leadership'] = df[['ldr', 'com']].mean(axis=1)
df['teamwork'] = df['tea']

# Goalkeeping Abilities
df['goalkeeping_abilities'] = df[['han', 'ref', 'kic', 'thr', '1v1', 'ecc', 'cmd']].mean(axis=1)

# Behavioral Attributes
df['behavioral_attributes'] = df[['agg', 'pun']].mean(axis=1)


# Player Traits
df['player_traits'] = df['tro']

#'dfgf' is the new DataFrame = DataFrame grouped features
dfgf = df[['name','shooting', 'dribbling_control', 'passing_vision', 'tackling_interception', 'aerial_defense', 'speed_agility', 'strength_stamina', 'decision_making', 'work_ethic_effort', 'leadership', 'teamwork', 'goalkeeping_abilities', 'behavioral_attributes', 'player_traits']]

In [14]:
dfgf

Unnamed: 0,name,shooting,dribbling_control,passing_vision,tackling_interception,aerial_defense,speed_agility,strength_stamina,decision_making,work_ethic_effort,leadership,teamwork,goalkeeping_abilities,behavioral_attributes,player_traits
0,Kevin De Bruyne,16.25,16.25,15.833333,9.50,5.666667,14.000000,15.000000,16.5,14.000000,7.0,14.0,1.714286,6.5,3.0
1,Harry Kane,16.75,13.75,13.833333,12.00,10.333333,12.333333,14.666667,17.0,17.666667,8.0,18.0,2.571429,6.5,3.0
2,Mohamed Salah,14.50,15.50,13.833333,9.75,6.666667,17.333333,15.666667,14.0,14.000000,6.5,14.0,2.142857,6.0,2.0
3,Sadio Mané,11.50,16.50,11.833333,11.25,9.000000,17.333333,14.000000,15.0,13.666667,4.5,15.0,2.142857,6.5,4.0
4,Son Heung-Min,13.25,14.50,12.333333,8.50,7.666667,15.333333,12.666667,15.0,12.000000,8.0,15.0,2.000000,5.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174319,Nick Brown,7.75,10.00,6.000000,4.00,5.333333,11.333333,9.333333,9.5,10.666667,3.5,10.0,1.714286,8.0,3.0
174320,Luis Alcalá,6.00,10.50,5.666667,3.50,4.333333,9.333333,8.000000,8.0,7.000000,2.0,8.0,2.428571,8.5,3.0
174321,Devin Lee,4.50,5.50,6.333333,9.25,8.666667,7.666667,11.666667,8.5,14.000000,2.0,13.0,1.714286,7.5,1.0
174322,Yeferson Suárez,6.25,8.50,6.833333,6.50,3.000000,7.333333,6.000000,12.0,8.333333,8.0,16.0,2.142857,8.0,1.0


In [15]:
dfgf_no_name = dfgf.iloc[:,1:]

### Custom scaler on the custom features

In [16]:
dfgf_no_name.sum(axis=1)

0         155.214286
1         166.404762
2         151.892857
3         152.226190
4         142.250000
             ...    
174319    100.130952
174320     86.261905
174321    101.297619
174322     99.892857
174323     90.357143
Length: 174324, dtype: float64

In [17]:
def custom_scaler(dfgf_no_name):
    total_score = dfgf_no_name.sum(axis=1)

    scaled_dfgf_no_name = dfgf_no_name.div(total_score, axis=0)

    return scaled_dfgf_no_name

In [18]:
scaled_df = custom_scaler(dfgf_no_name)
scaled_df

Unnamed: 0,shooting,dribbling_control,passing_vision,tackling_interception,aerial_defense,speed_agility,strength_stamina,decision_making,work_ethic_effort,leadership,teamwork,goalkeeping_abilities,behavioral_attributes,player_traits
0,0.104694,0.104694,0.102010,0.061206,0.036509,0.090198,0.096641,0.106305,0.090198,0.045099,0.090198,0.011045,0.041878,0.019328
1,0.100658,0.082630,0.083131,0.072113,0.062098,0.074116,0.088139,0.102161,0.106167,0.048076,0.108170,0.015453,0.039061,0.018028
2,0.095462,0.102046,0.091073,0.064190,0.043891,0.114116,0.103143,0.092170,0.092170,0.042793,0.092170,0.014108,0.039502,0.013167
3,0.075545,0.108391,0.077735,0.073903,0.059123,0.113866,0.091968,0.098538,0.089779,0.029561,0.098538,0.014077,0.042700,0.026277
4,0.093146,0.101933,0.086702,0.059754,0.053896,0.107791,0.089045,0.105448,0.084359,0.056239,0.105448,0.014060,0.035149,0.007030
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174319,0.077399,0.099869,0.059922,0.039948,0.053264,0.113185,0.093211,0.094876,0.106527,0.034954,0.099869,0.017120,0.079895,0.029961
174320,0.069556,0.121722,0.065691,0.040574,0.050235,0.108198,0.092741,0.092741,0.081148,0.023185,0.092741,0.028153,0.098537,0.034778
174321,0.044424,0.054295,0.062522,0.091315,0.085556,0.075685,0.115172,0.083911,0.138207,0.019744,0.128335,0.016923,0.074039,0.009872
174322,0.062567,0.085091,0.068407,0.065070,0.030032,0.073412,0.060064,0.120129,0.083423,0.080086,0.160172,0.021452,0.080086,0.010011


In [19]:
scaled_df

Unnamed: 0,shooting,dribbling_control,passing_vision,tackling_interception,aerial_defense,speed_agility,strength_stamina,decision_making,work_ethic_effort,leadership,teamwork,goalkeeping_abilities,behavioral_attributes,player_traits
0,0.104694,0.104694,0.102010,0.061206,0.036509,0.090198,0.096641,0.106305,0.090198,0.045099,0.090198,0.011045,0.041878,0.019328
1,0.100658,0.082630,0.083131,0.072113,0.062098,0.074116,0.088139,0.102161,0.106167,0.048076,0.108170,0.015453,0.039061,0.018028
2,0.095462,0.102046,0.091073,0.064190,0.043891,0.114116,0.103143,0.092170,0.092170,0.042793,0.092170,0.014108,0.039502,0.013167
3,0.075545,0.108391,0.077735,0.073903,0.059123,0.113866,0.091968,0.098538,0.089779,0.029561,0.098538,0.014077,0.042700,0.026277
4,0.093146,0.101933,0.086702,0.059754,0.053896,0.107791,0.089045,0.105448,0.084359,0.056239,0.105448,0.014060,0.035149,0.007030
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174319,0.077399,0.099869,0.059922,0.039948,0.053264,0.113185,0.093211,0.094876,0.106527,0.034954,0.099869,0.017120,0.079895,0.029961
174320,0.069556,0.121722,0.065691,0.040574,0.050235,0.108198,0.092741,0.092741,0.081148,0.023185,0.092741,0.028153,0.098537,0.034778
174321,0.044424,0.054295,0.062522,0.091315,0.085556,0.075685,0.115172,0.083911,0.138207,0.019744,0.128335,0.016923,0.074039,0.009872
174322,0.062567,0.085091,0.068407,0.065070,0.030032,0.073412,0.060064,0.120129,0.083423,0.080086,0.160172,0.021452,0.080086,0.010011


In [20]:
# Adding the player Names to the DataFrame scaled_df + The total score min-max scaled 
scaled_df_name = scaled_df.copy()
scaled_df_name['name'] = dfgf['name']
scaled_df_name.drop(columns = 'goalkeeping_abilities', inplace = True)

### Player scoring 

In [21]:
# Adding the total score min-max scaled to the DataFrame
# Scale the total score (reshape is required as MinMaxScaler expects 2D input)
from sklearn.preprocessing import MinMaxScaler

def custom_scaler(dfgf_no_name):
    total_score = dfgf_no_name.sum(axis=1)
    return total_score

total_score = custom_scaler(dfgf_no_name)

scaler = MinMaxScaler()

scaled_total_score = scaler.fit_transform(total_score.values.reshape(-1, 1)).flatten()

scaled_df['scaled_total_score'] = scaled_total_score


scaled_df

Unnamed: 0,shooting,dribbling_control,passing_vision,tackling_interception,aerial_defense,speed_agility,strength_stamina,decision_making,work_ethic_effort,leadership,teamwork,goalkeeping_abilities,behavioral_attributes,player_traits,scaled_total_score
0,0.104694,0.104694,0.102010,0.061206,0.036509,0.090198,0.096641,0.106305,0.090198,0.045099,0.090198,0.011045,0.041878,0.019328,0.823356
1,0.100658,0.082630,0.083131,0.072113,0.062098,0.074116,0.088139,0.102161,0.106167,0.048076,0.108170,0.015453,0.039061,0.018028,0.907472
2,0.095462,0.102046,0.091073,0.064190,0.043891,0.114116,0.103143,0.092170,0.092170,0.042793,0.092170,0.014108,0.039502,0.013167,0.798389
3,0.075545,0.108391,0.077735,0.073903,0.059123,0.113866,0.091968,0.098538,0.089779,0.029561,0.098538,0.014077,0.042700,0.026277,0.800895
4,0.093146,0.101933,0.086702,0.059754,0.053896,0.107791,0.089045,0.105448,0.084359,0.056239,0.105448,0.014060,0.035149,0.007030,0.725906
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174319,0.077399,0.099869,0.059922,0.039948,0.053264,0.113185,0.093211,0.094876,0.106527,0.034954,0.099869,0.017120,0.079895,0.029961,0.409306
174320,0.069556,0.121722,0.065691,0.040574,0.050235,0.108198,0.092741,0.092741,0.081148,0.023185,0.092741,0.028153,0.098537,0.034778,0.305056
174321,0.044424,0.054295,0.062522,0.091315,0.085556,0.075685,0.115172,0.083911,0.138207,0.019744,0.128335,0.016923,0.074039,0.009872,0.418076
174322,0.062567,0.085091,0.068407,0.065070,0.030032,0.073412,0.060064,0.120129,0.083423,0.080086,0.160172,0.021452,0.080086,0.010011,0.407517


In [22]:
#scaled_df_label_score = scaled_df_label_score.rename(columns={'Name': 'name'})
scaled_df['name'] = dfgf['name']

In [23]:
scaled_df

Unnamed: 0,shooting,dribbling_control,passing_vision,tackling_interception,aerial_defense,speed_agility,strength_stamina,decision_making,work_ethic_effort,leadership,teamwork,goalkeeping_abilities,behavioral_attributes,player_traits,scaled_total_score,name
0,0.104694,0.104694,0.102010,0.061206,0.036509,0.090198,0.096641,0.106305,0.090198,0.045099,0.090198,0.011045,0.041878,0.019328,0.823356,Kevin De Bruyne
1,0.100658,0.082630,0.083131,0.072113,0.062098,0.074116,0.088139,0.102161,0.106167,0.048076,0.108170,0.015453,0.039061,0.018028,0.907472,Harry Kane
2,0.095462,0.102046,0.091073,0.064190,0.043891,0.114116,0.103143,0.092170,0.092170,0.042793,0.092170,0.014108,0.039502,0.013167,0.798389,Mohamed Salah
3,0.075545,0.108391,0.077735,0.073903,0.059123,0.113866,0.091968,0.098538,0.089779,0.029561,0.098538,0.014077,0.042700,0.026277,0.800895,Sadio Mané
4,0.093146,0.101933,0.086702,0.059754,0.053896,0.107791,0.089045,0.105448,0.084359,0.056239,0.105448,0.014060,0.035149,0.007030,0.725906,Son Heung-Min
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174319,0.077399,0.099869,0.059922,0.039948,0.053264,0.113185,0.093211,0.094876,0.106527,0.034954,0.099869,0.017120,0.079895,0.029961,0.409306,Nick Brown
174320,0.069556,0.121722,0.065691,0.040574,0.050235,0.108198,0.092741,0.092741,0.081148,0.023185,0.092741,0.028153,0.098537,0.034778,0.305056,Luis Alcalá
174321,0.044424,0.054295,0.062522,0.091315,0.085556,0.075685,0.115172,0.083911,0.138207,0.019744,0.128335,0.016923,0.074039,0.009872,0.418076,Devin Lee
174322,0.062567,0.085091,0.068407,0.065070,0.030032,0.073412,0.060064,0.120129,0.083423,0.080086,0.160172,0.021452,0.080086,0.010011,0.407517,Yeferson Suárez


### df with positions and foots¶

In [24]:
df = pd.read_csv("../raw_data/clean_data.csv")

In [25]:
dfpf = df.copy()

In [26]:

# Create a new dataset with the numerical features for the followings steps.

columns_to_remove = ['nat', 'position', 'dob', 'age', 'height', 'weight',
       'wage', 'last trans. fee', 'value', 'agg', 'jum', 'pun', 'vis', 'l th',
       'lon', 'otb', 'tck', 'tec', 'tea', 'cmp', 'fre', 'ref', 'pos', 'pen',
       'pas', 'fla', 'ant', 'cro', 'mar', 'ldr', 'cor', 'cnt', 'det', 'dec',
       'hea', 'fir', 'com', 'acc', 'pac', 'aer', 'str', 'thr', 'han', 'ecc',
       'dri', 'bal', 'kic', 'sta', 'agi', 'wor', 'bra', 'cmd', 'fin', '1v1',
       'tro', 'division_rating', 'nat_rating', 'club_rating']

# Remove specified columns
dfpf.drop(columns=columns_to_remove, inplace=True)

In [27]:
#dfpf.head()

In [28]:
#dfgf.info()

###  Global df = scaled_df (grouped scaled features)+ dfpf (position & foots)

In [29]:
dfpf.drop(columns=['name', 'club'])

Unnamed: 0,centerback,fullback,goalkeeper,midfielder,striker,winger,either_left,either_right,left,right
0,0,0,0,1,0,0,0,1,0,0
1,0,0,0,0,1,0,1,0,0,0
2,0,0,0,0,0,1,0,0,1,0
3,0,0,0,0,0,1,0,0,0,1
4,0,0,0,0,0,1,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...
174319,0,0,0,0,1,0,0,0,0,1
174320,0,0,0,0,1,0,0,0,0,1
174321,1,0,0,0,0,0,0,0,0,1
174322,0,0,0,1,0,0,0,0,0,1


In [30]:
scaled_df['centerback'] = dfpf['centerback']
scaled_df['fullback'] = dfpf['fullback']
scaled_df['goalkeeper'] = dfpf['goalkeeper']
scaled_df['midfielder'] = dfpf['midfielder']
scaled_df['striker'] = dfpf['striker']
scaled_df['winger'] = dfpf['winger']
scaled_df['either_left'] = dfpf['either_left']
scaled_df['either_right'] = dfpf['either_right']
scaled_df['left'] = dfpf['left']
scaled_df['right'] = dfpf['right']

In [31]:
scaled_df.columns

Index(['shooting', 'dribbling_control', 'passing_vision',
       'tackling_interception', 'aerial_defense', 'speed_agility',
       'strength_stamina', 'decision_making', 'work_ethic_effort',
       'leadership', 'teamwork', 'goalkeeping_abilities',
       'behavioral_attributes', 'player_traits', 'scaled_total_score', 'name',
       'centerback', 'fullback', 'goalkeeper', 'midfielder', 'striker',
       'winger', 'either_left', 'either_right', 'left', 'right'],
      dtype='object')

In [32]:
scaled_df.head(5)

Unnamed: 0,shooting,dribbling_control,passing_vision,tackling_interception,aerial_defense,speed_agility,strength_stamina,decision_making,work_ethic_effort,leadership,...,centerback,fullback,goalkeeper,midfielder,striker,winger,either_left,either_right,left,right
0,0.104694,0.104694,0.10201,0.061206,0.036509,0.090198,0.096641,0.106305,0.090198,0.045099,...,0,0,0,1,0,0,0,1,0,0
1,0.100658,0.08263,0.083131,0.072113,0.062098,0.074116,0.088139,0.102161,0.106167,0.048076,...,0,0,0,0,1,0,1,0,0,0
2,0.095462,0.102046,0.091073,0.06419,0.043891,0.114116,0.103143,0.09217,0.09217,0.042793,...,0,0,0,0,0,1,0,0,1,0
3,0.075545,0.108391,0.077735,0.073903,0.059123,0.113866,0.091968,0.098538,0.089779,0.029561,...,0,0,0,0,0,1,0,0,0,1
4,0.093146,0.101933,0.086702,0.059754,0.053896,0.107791,0.089045,0.105448,0.084359,0.056239,...,0,0,0,0,0,1,1,0,0,0


## add fictive player

In [33]:
scaled_df

Unnamed: 0,shooting,dribbling_control,passing_vision,tackling_interception,aerial_defense,speed_agility,strength_stamina,decision_making,work_ethic_effort,leadership,...,centerback,fullback,goalkeeper,midfielder,striker,winger,either_left,either_right,left,right
0,0.104694,0.104694,0.102010,0.061206,0.036509,0.090198,0.096641,0.106305,0.090198,0.045099,...,0,0,0,1,0,0,0,1,0,0
1,0.100658,0.082630,0.083131,0.072113,0.062098,0.074116,0.088139,0.102161,0.106167,0.048076,...,0,0,0,0,1,0,1,0,0,0
2,0.095462,0.102046,0.091073,0.064190,0.043891,0.114116,0.103143,0.092170,0.092170,0.042793,...,0,0,0,0,0,1,0,0,1,0
3,0.075545,0.108391,0.077735,0.073903,0.059123,0.113866,0.091968,0.098538,0.089779,0.029561,...,0,0,0,0,0,1,0,0,0,1
4,0.093146,0.101933,0.086702,0.059754,0.053896,0.107791,0.089045,0.105448,0.084359,0.056239,...,0,0,0,0,0,1,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174319,0.077399,0.099869,0.059922,0.039948,0.053264,0.113185,0.093211,0.094876,0.106527,0.034954,...,0,0,0,0,1,0,0,0,0,1
174320,0.069556,0.121722,0.065691,0.040574,0.050235,0.108198,0.092741,0.092741,0.081148,0.023185,...,0,0,0,0,1,0,0,0,0,1
174321,0.044424,0.054295,0.062522,0.091315,0.085556,0.075685,0.115172,0.083911,0.138207,0.019744,...,1,0,0,0,0,0,0,0,0,1
174322,0.062567,0.085091,0.068407,0.065070,0.030032,0.073412,0.060064,0.120129,0.083423,0.080086,...,0,0,0,1,0,0,0,0,0,1


## K-mean

In [34]:
scaled_df_kmeans = scaled_df.copy()

In [35]:
scaled_df_kmeans.rename(str.strip, axis='columns', inplace = True)

In [36]:
scaled_df_kmeans = scaled_df_kmeans[scaled_df_kmeans.goalkeeper != 1]

In [37]:
scaled_df_kmeans.drop(columns=['name', 'goalkeeper', 'goalkeeping_abilities'], inplace=True)

In [38]:
#scaled_df_kmeans.columns

In [39]:
km = KMeans(n_clusters=12)
km.fit(scaled_df_kmeans)

  super()._check_params_vs_input(X, default_n_init=10)


km.predict(Nino)

Nino['label'] = km.predict(Nino)

Nino

In [40]:
scaled_df_kmeans['name'] = scaled_df['name']
scaled_df_kmeans['label'] = km.labels_
scaled_df_kmeans[['name', 'label']].head(5)

Unnamed: 0,name,label
0,Kevin De Bruyne,11
1,Harry Kane,10
2,Mohamed Salah,8
3,Sadio Mané,3
4,Son Heung-Min,8


In [41]:
scaled_df_kmeans[scaled_df_kmeans['label']==10].sort_values(by='scaled_total_score', ascending=False)[['name', 'label']].head(50)

Unnamed: 0,name,label
1,Harry Kane,10
68539,Romelu Lukaku,10
68567,Mikel Oyarzábal,10
68559,Lionel Messi,10
68536,Antoine Griezmann,10
68990,Felipe Caicedo,10
68649,Dušan Tadić,10
68534,Erling Haaland,10
143114,Giovanni Moreno,10
69461,Simone Zaza,10


In [42]:
#scaled_df_kmeans[scaled_df_kmeans['name']=='Marc-André ter Stegen']

In [46]:
feature_columns = ['shooting', 'dribbling_control', 'passing_vision',
       'tackling_interception', 'aerial_defense', 'speed_agility',
       'strength_stamina', 'decision_making', 'work_ethic_effort',
       'leadership', 'teamwork', 'behavioral_attributes', 'player_traits',
       'scaled_total_score', 'centerback', 'fullback', 'midfielder', 'striker',
       'winger', 'either_left', 'either_right', 'left', 'right', 'label']

In [47]:
nbrs = NearestNeighbors(n_neighbors= 1000).fit(scaled_df_kmeans[feature_columns])

Nino['name'] = Nino_name

test_player = Nino[Nino['name']== 'Nino_M_12'].drop(columns='name')

test_player

distances, indices = nbrs.kneighbors(test_player)
similar_players_indices = indices.flatten()

similar_players = scaled_df_kmeans.iloc[similar_players_indices].sort_values(by='scaled_total_score',ascending=False)[['name', 'scaled_total_score']]
similar_players.head(30)

In [51]:
# Save the scaler to the specified directory
joblib.dump(scaler, '../raw_data/minmax_scaler.joblib')

# Later, you can load the scaler back
# loaded_scaler = joblib.load('../raw_data/minmax_scaler.joblib')

# Save the KNN model to the specified directory
joblib.dump(km, '../raw_data/knn_model.joblib')

# Later, you can load the KNN model back
# loaded_km = joblib.load('../raw_data/knn_model.joblib')

# Save the models to the specified directory
joblib.dump(nbrs, '../raw_data/nearest_neighbors_model.joblib')

# Later, you can load the models back
# loaded_nbrs = joblib.load('../raw_data/nearest_neighbors_model.joblib')

['../raw_data/nearest_neighbors_model.joblib']

## KNN

In [None]:
feature_columns = ['shooting', 'dribbling_control', 'passing_vision',
       'tackling_interception', 'aerial_defense', 'speed_agility',
       'strength_stamina', 'decision_making', 'work_ethic_effort',
       'leadership', 'teamwork', 'behavioral_attributes', 'player_traits',
       'scaled_total_score', 'centerback', 'fullback', 'midfielder', 'striker',
       'winger', 'either_left', 'either_right', 'left', 'right', 'label']

In [None]:
def find_closest_players(position, team_1, team_2, num_neighbors):
    
    def custom_scaler(dfgf_no_name):
        total_score = dfgf_no_name.sum(axis=1)
        scaled_dfgf_no_name = dfgf_no_name.div(total_score, axis=0)
        total_score = scaler.transform(total_score.values.reshape(-1, 1)).flatten()
        return total_score, scaled_dfgf_no_name
    
    def compare_teams(df, team1, team2):
        df1 = df.loc[team1]
        df2 = df.loc[team2]
        total_score, result = custom_scaler(pd.DataFrame(df1.subtract(df2)).T.drop(columns=['club_rating']))
        result = result.clip(lower=0)
        result['scaled_total_score'] = abs(total_score)
        name = pd.DataFrame(data=["Nino_M"], columns=['name'])
        result = name.join(result)
        return result

    # Specify the dimensions of the DataFrame
    rows = 4
    columns = 4

    # Create a DataFrame filled with zeros
    foots = pd.DataFrame(np.zeros((rows, columns)), columns=[f'col{i+1}' for i in range(columns)])
    new_column_names = ['either_left', 'either_right', 'left', 'right']
    foots.columns = new_column_names
    # Set the diagonal elements to 1
    np.fill_diagonal(foots.values, 1)
    
    # Specify the dimensions of the DataFrame
    rows = 6
    columns = 6

    # Create a DataFrame filled with zeros
    positions = pd.DataFrame(np.zeros((rows, columns)), columns=[f'col{i+1}' for i in range(columns)])
    new_column_names = ['centerback', 'fullback', 'goalkeeper', 'midfielder', 'striker', 'winger']
    positions.columns = new_column_names
    # Set the diagonal elements to 1
    np.fill_diagonal(positions.values, 1)
    
    final_df = positions.join(foots, how='cross')
    
    Nino = compare_teams(position, team_1, team_2)
    Nino = Nino.join(final_df, how='cross')
    series = pd.Series([f'Nino_M_{i}' for i in range(24)])
    Nino.name = series
    
    Nino = Nino[Nino.goalkeeper != 1]
    Nino.drop(columns=['goalkeeper', 'goalkeeping_abilities'], inplace=True)
    
    Nino.reset_index(inplace=True, drop=True)
    
    data = Nino

    #if player_name not in data['name'].values:
    #r    return f"Player '{player_name}' not found in the dataset."
    
    feature_columns = data.columns.drop('name')
    
    results = []

    for player_name in Nino.name:
        # Extract the specified player's statistics
        player_stats = data[data['name'] == player_name].drop(columns='name')
    
        player_stats['label'] = km.predict(player_stats)
    
        # Drop the 'label' column before fitting the model
        player_stats = player_stats.drop(columns='label')

        # Fit the NearestNeighbors model
        nbrs = NearestNeighbors(n_neighbors=num_neighbors + 1).fit(scaled_df_kmeans[feature_columns])

        # Find the nearest neighbors
        distances, indices = nbrs.kneighbors(player_stats)

        # Get the names of similar players
        # Exclude the first one if it's the player themselves
        similar_players_indices = indices.flatten()
        similar_players = scaled_df_kmeans.iloc[similar_players_indices].sort_values(by='scaled_total_score',ascending=False)[['name', 'scaled_total_score']]
        #by='scaled_total_score' bPREVIOUS SORTING
        results.append(similar_players)
        
    final_result = pd.concat(results).sort_values(by='scaled_total_score', ascending=False)
    return final_result

# Example usage
# Assuming 'merged_df_kmeans_NOGK' is your DataFrame and it's already scaled
# Replace 'player_name' with the name of the player you're interested in
# Replace 'num_neighbors' with the number of neighbors you want to find
# 'feature_columns' should be the list of columns used as features in KNN
# Adjust as needed

In [None]:
find_closest_players(midfielders, 'Man City', 'Club Brugge', 1000)

In [None]:
Nino

In [None]:
closest_players = find_closest_players('Nino_M_20', 1000, Nino)
closest_players

In [None]:
results = []

for name in Nino.name:
    closest_players_df = find_closest_players(name, 1000, Nino)
    results.append(closest_players_df)

# Concatenate and sort the results
final_result = pd.concat(results).sort_values(by='scaled_total_score', ascending=False)
final_result

## rest

In [None]:
scaled_df_kmeans['name'] = scaled_df['name']

In [None]:
from sklearn.neighbors import NearestNeighbors
import pandas as pd

def find_closest_players(player_name, num_neighbors, data, feature_columns):
    """
    Find the closest players based on the KNN model for a given player.

    Parameters:
    player_name (str): The name of the player to find neighbors for.
    num_neighbors (int): The number of closest neighbors to find.
    data (pd.DataFrame): The DataFrame containing player data.
    feature_columns (list): List of columns to use as features in KNN.

    Returns:
    list: A list of closest player names.
    """
    if player_name not in data['name'].values:
        return f"Player '{player_name}' not found in the dataset."
    
    # Extract the specified player's statistics
    player_stats = data[data['name'] == player_name][feature_columns].to_numpy()

    # Fit the NearestNeighbors model
    nbrs = NearestNeighbors(n_neighbors=num_neighbors + 1).fit(data[feature_columns])

    # Find the nearest neighbors
    distances, indices = nbrs.kneighbors(player_stats)

    # Get the names of similar players
    # Exclude the first one if it's the player themselves
    similar_players_indices = indices.flatten()
    if similar_players_indices[0] == data[data['name'] == player_name].index[0]:
        similar_players_indices = similar_players_indices[1:]
    else:
        similar_players_indices = similar_players_indices[:-1]
    
    similar_players = data.iloc[similar_players_indices].sort_values(by='scaled_total_score',ascending=False)[['name', 'scaled_total_score']]
  #by='scaled_total_score' bPREVIOUS SORTING  
    return similar_players

# Example usage
# Assuming 'merged_df_kmeans_NOGK' is your DataFrame and it's already scaled
# Replace 'player_name' with the name of the player you're interested in
# Replace 'num_neighbors' with the number of neighbors you want to find
# 'feature_columns' should be the list of columns used as features in KNN
player_name = 'Neymar'
num_neighbors = 10
feature_columns = scaled_df_kmeans.columns.drop('name')  # Adjust as needed

closest_players = find_closest_players(player_name, num_neighbors, scaled_df_kmeans, feature_columns)
print("Closest Players:", closest_players[1:])

In [None]:
feature_columns

## Plot 

In [None]:
import plotly.graph_objects as go
import pandas as pd
from sklearn.neighbors import NearestNeighbors

def find_closest_players(player_name, num_neighbors, data, feature_columns):
    if player_name not in data['name'].values:
        return f"Player '{player_name}' not found in the dataset."
    
    # Extract the specified player's statistics
    player_stats = data[data['name'] == player_name][feature_columns].to_numpy()
    
    # Fit the NearestNeighbors model
    nbrs = NearestNeighbors(n_neighbors=num_neighbors + 1).fit(data[feature_columns])
    
    # Find the nearest neighbors
    distances, indices = nbrs.kneighbors(player_stats)
    
    # Get the names of similar players
    similar_players_indices = indices.flatten()
    if similar_players_indices[0] == data[data['name'] == player_name].index[0]:
        similar_players_indices = similar_players_indices[1:]
    else:
        similar_players_indices = similar_players_indices[:-1]
    
    # Extract similar players data
    return data.iloc[similar_players_indices]

# Define the player name
player_name = 'Neymar'
num_neighbors = 7

# Use all columns except 'name' for finding neighbors
all_feature_columns = scaled_df_kmeans.columns.drop('name')

# Define the subset of features for the radar chart
radar_features = ['shooting', 'dribbling_control', 'passing_vision',
                  'tackling_interception', 'aerial_defense', 'speed_agility',
                  'strength_stamina', 'decision_making', 'work_ethic_effort',
                  'leadership', 'teamwork', 'behavioral_attributes', 'player_traits']

# Get closest players
closest_players = find_closest_players(player_name, num_neighbors, scaled_df_kmeans, all_feature_columns)

# Player's own stats for radar features
player_stats = scaled_df_kmeans[scaled_df_kmeans['name'] == player_name][radar_features].iloc[0]

# Create and display radar charts, skipping the first player
for i, (index, row) in enumerate(closest_players.iterrows()):
    if i == 0:  # Skip the first player's chart
        continue

    fig = go.Figure()

    # Add trace for the input player
    fig.add_trace(go.Scatterpolar(
        r=player_stats.values,
        theta=radar_features,
        fill='toself',
        name=player_name
    ))

    # Add trace for the closest player
    fig.add_trace(go.Scatterpolar(
        r=row[radar_features].values,
        theta=radar_features,
        fill='toself',
        name=row['name']
    ))

    # Update layout
    fig.update_layout(
        polar=dict(
            radialaxis=dict(
                visible=True
            )),
        showlegend=True,
        title=f"Comparison: {player_name} vs {row['name']}"
    )

    # Show the plot
    fig.show()


In [None]:
import streamlit as st
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import plotly.graph_objects as go
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import joblib
scaler = joblib.load('models/minmax_scaler.joblib')
km = joblib.load('models/knn_model.joblib')
nbrs = joblib.load('models/nearest_neighbors_model.joblib')
scaled_df_kmeans = pd.read_csv('raw_data/scaled_df_kmeans.csv')
st.title('Football Fictive Player Processing')
# Input Request
st.markdown("""
# Inputs:
## Player Characteristics
""")
# Define your game styles, positions, and age ranges
game_styles = ['Counter-Attacking Prowess', 'High-Pressing Havoc', 'Defensive Fortress',
               'Wing Dominance', 'Possession with Purpose', 'Youthful Energy and High Intensity',
               'Midfield Maestros']
# Mapping of positions to their corresponding CSV files
centerback_df = pd.read_csv('raw_data/centerbacks.csv')
fullback_df = pd.read_csv('raw_data/fullbacks.csv')
midfielder_df = pd.read_csv('raw_data/midfielders.csv')
winger_df = pd.read_csv('raw_data/wingers.csv')
striker_df = pd.read_csv('raw_data/strikers.csv')
position_to_file = {
    'centerback': centerback_df,
    'fullback': fullback_df,
    'midfielder': midfielder_df,
    'winger': winger_df,
    'striker': striker_df
}
ages_min = list(range(15, 41))
ages_max = list(range(15, 41))
# Streamlit input widgets
option0_club_name1 = st.text_input('Your club name:', 'Club Brugge')
option0_club_name2 = st.text_input('Club playing style :', 'Man City') # A supprimer plus tard
option1_game_style = st.selectbox('Select a game style', game_styles)
option2_selected_position = st.selectbox('Select a position', options=list(position_to_file.keys()))
option3_ages_min = st.selectbox('Select the player minimum age', ages_min)
option4_ages_max = st.selectbox('Select the player maximum age', ages_max)
option5_market_value = st.number_input('Select market value')
option6_expected_market_value = st.number_input('Select expected market value')
# Display the selected parameters
filtered_df = pd.DataFrame({
    'Game style': [option1_game_style],
    'Position': [option2_selected_position],
    'Age min': [option3_ages_min],
    'Age max': [option4_ages_max],
    'Market value': [option5_market_value],
    'Expected market value': [option6_expected_market_value]
})
st.write('Player parameters:')
st.write(filtered_df)
# NOTES ET MODIFICATIONS A APPORTER AU CODE !
#Changer les paramètres : team_1 -> option0_club_name1 ...
#Changer les paramètres : team_2 -> option1_game_style
#Changer les paramètres : position -> option2_selected_position
#num_neighbors doit être egal a 1000 pour pouvoir filter apres !
# voir le probleme avec le scaler !(minmaxscaler)
#choisir le bon dataframe (voir si noms dedans (devrait deja etre le cas))
# Define your find_closest_players function here
# Include all necessary imports and function definitions inside the function
def find_closest_players(position, team_1, team_2, num_neighbors):
    # Your function implementation...
    # For now, I'll return a placeholder DataFrame for demonstration
    # Replace this with the actual logic of your function
    def custom_scaler(dfgf_no_name):
        total_score = dfgf_no_name.sum(axis=1)
        scaled_dfgf_no_name = dfgf_no_name.div(total_score, axis=0)
        total_score = scaler.transform(total_score.values.reshape(-1, 1)).flatten()
        return total_score, scaled_dfgf_no_name
    def compare_teams(df, team1, team2):
        df1 = df.loc[team1]
        df2 = df.loc[team2]
        total_score, result = custom_scaler(pd.DataFrame(df1.subtract(df2)).T.drop(columns=['club_rating']))
        result = result.clip(lower=0)
        result['scaled_total_score'] = abs(total_score)
        name = pd.DataFrame(data=["Nino_M"], columns=['name'])
        result = name.join(result)
        #return result
        return pd.DataFrame({'Player': ['Player1', 'Player2'], 'Score': [0.8, 0.75]})
    # Specify the dimensions of the DataFrame
    rows = 4
    columns = 4
    # Create a DataFrame filled with zeros
    foots = pd.DataFrame(np.zeros((rows, columns)), columns=[f'col{i+1}' for i in range(columns)])
    new_column_names = ['either_left', 'either_right', 'left', 'right']
    foots.columns = new_column_names
    # Set the diagonal elements to 1
    np.fill_diagonal(foots.values, 1)
    # Specify the dimensions of the DataFrame
    rows = 6
    columns = 6
    # Create a DataFrame filled with zeros
    positions = pd.DataFrame(np.zeros((rows, columns)), columns=[f'col{i+1}' for i in range(columns)])
    new_column_names = ['centerback', 'fullback', 'goalkeepers', 'midfielder', 'striker', 'winger']
    positions.columns = new_column_names
    # Set the diagonal elements to 1
    np.fill_diagonal(positions.values, 1)
    final_df = positions.join(foots, how='cross')
    Nino = compare_teams(position, team_1, team_2)
    Nino = Nino.join(final_df, how='cross')
    series = pd.Series([f'Nino_M_{i}' for i in range(24)])
    Nino.name = series
    Nino = Nino[Nino.goalkeeper != 1]
    Nino.drop(columns=['goalkeeper', 'goalkeeping_abilities'], inplace=True)
    Nino.reset_index(inplace=True, drop=True)
    data = Nino
    #if player_name not in data['name'].values:
    #r    return f"Player '{player_name}' not found in the dataset."
    feature_columns = data.columns.drop('name')
    results = []
    for player_name in Nino.name:
        # Extract the specified player's statistics
        player_stats = data[data['name'] == player_name].drop(columns='name')
        player_stats['label'] = km.predict(player_stats)
        # Drop the 'label' column before fitting the model
        player_stats = player_stats.drop(columns='label')
        # Fit the NearestNeighbors model
        # Find the nearest neighbors
        distances, indices = nbrs.kneighbors(player_stats)
        # Get the names of similar players
        # Exclude the first one if it's the player themselves
        similar_players_indices = indices.flatten()
        similar_players = scaled_df_kmeans.iloc[similar_players_indices].sort_values(by='scaled_total_score',ascending=False)[['name', 'scaled_total_score']]
        #by='scaled_total_score' bPREVIOUS SORTING
        results.append(similar_players)
    final_result = pd.concat(results).sort_values(by='scaled_total_score', ascending=False)
    return final_result
# Trigger analysis based on user input
if st.checkbox('Start player analysis'):
    # Loading animation code
    with st.spinner('Performing analysis...'):
        # Call your find_closest_players function
        closest_players_result = find_closest_players(position_to_file[option2_selected_position], option0_club_name1, option0_club_name2, 5)
        # Display the results using Plotly graphs
        for index, row in closest_players_result.iterrows():
            fig = go.Figure()
            # Example Plotly graph - replace with your actual graph code
            fig.add_trace(go.Bar(x=['Player', 'Score'], y=[row['Player'], row['Score']]))
            st.plotly_chart(fig)
            # Limit to 5 graphs
            if index >= 4:
                break
    st.success('Player analysis done!')
if st.checkbox('Show Details of Selected Players'):
    if 'closest_players_result' in locals():
        st.write(closest_players_result)
    else:
        st.warning("Please run the analysis first to see details.")
# Allow users to download the results as CSV
if st.checkbox('Download Results as CSV'):
    if 'closest_players_result' in locals():
        csv = closest_players_result.to_csv(index=False)
        st.download_button(
            label="Download data as CSV",
            data=csv,
            file_name='closest_players.csv',
            mime='text/csv',
        )
    else:
        st.warning("Please run the analysis first to download the data.")
# About section
st.markdown("""
## About this Tool
This tool is designed for football analysts and enthusiasts to find players with similar characteristics and performance metrics. By inputting a player's details and selecting specific criteria, the tool uses advanced data analysis techniques to identify comparable players and visualize their similarities.
""")
# Add any additional code or features here...
# End of Streamlit app