In [1]:
import pandas as pd

In [32]:
df = pd.read_csv("../raw_data/clean_data.csv")

In [33]:
df.columns

Index(['name', 'club', 'nat', 'position', 'dob', 'age', 'height', 'weight',
       'wage', 'last trans. fee', 'value', 'agg', 'jum', 'pun', 'vis', 'l th',
       'lon', 'otb', 'tck', 'tec', 'tea', 'cmp', 'fre', 'ref', 'pos', 'pen',
       'pas', 'fla', 'ant', 'cro', 'mar', 'ldr', 'cor', 'cnt', 'det', 'dec',
       'hea', 'fir', 'com', 'acc', 'pac', 'aer', 'str', 'thr', 'han', 'ecc',
       'dri', 'bal', 'kic', 'sta', 'agi', 'wor', 'bra', 'cmd', 'fin', '1v1',
       'tro', 'centerback', 'fullback', 'goalkeeper', 'midfielder', 'striker',
       'winger', 'division_rating', 'nat_rating', 'club_rating', 'either_left',
       'either_right', 'left', 'right'],
      dtype='object')

In [34]:
# Step 1: Drop rows where the club is equal to 'Other'
df = df[df['club'] != 'Other']

# first way

In [None]:
# Step 2: Group by 'club' and take the mean values
df = df.groupby('club').mean()

In [None]:
# Create a new dataset with the numerical features for the followings steps.
columns_to_remove = ['age', 'height', 'weight',"wage", "last trans. fee", "value"]

In [None]:
df.drop(columns=columns_to_remove, inplace=True)

In [None]:
# Grouping and creating new features by calculating the mean of each group
# Offensive Skills
df['shooting'] = df[['fin', 'lon', 'fre', 'pen']].mean(axis=1)
df['dribbling_control'] = df[['dri', 'fir', 'fla', 'tec']].mean(axis=1)
df['passing_vision'] = df[['pas', 'vis', 'l th', 'cro', 'cor', 'otb']].mean(axis=1)
# Defensive Skills
df['tackling_interception'] = df[['tck', 'mar', 'pos', 'ant']].mean(axis=1)
df['aerial_defense'] = df[['hea', 'jum', 'aer']].mean(axis=1)
# Physical Attributes
df['speed_agility'] = df[['acc', 'pac', 'agi']].mean(axis=1)
df['strength_stamina'] = df[['str', 'sta', 'bal']].mean(axis=1)
# Mental and Tactical Attributes
df['decision_making'] = df[['dec', 'cmp']].mean(axis=1)
df['work_ethic_effort'] = df[['wor', 'det', 'bra']].mean(axis=1)
# Leadership and Teamwork
df['leadership'] = df[['ldr', 'com']].mean(axis=1)
df['teamwork'] = df['tea']
# Goalkeeping Abilities
df['goalkeeping_abilities'] = df[['han', 'ref', 'kic', 'thr', '1v1', 'ecc', 'cmd']].mean(axis=1)
# Behavioral Attributes
df['behavioral_attributes'] = df[['agg', 'pun']].mean(axis=1)
# Player Traits
df['player_traits'] = df['tro']
#'dfgf' is the new DataFrame = DataFrame grouped features
df = df[['shooting', 'dribbling_control', 
           'passing_vision', 'tackling_interception', 
           'aerial_defense', 'speed_agility', 
           'strength_stamina', 'decision_making', 
           'work_ethic_effort', 'leadership', 'teamwork', 
           'goalkeeping_abilities', 'behavioral_attributes', 
           'player_traits', 'club_rating']]

In [None]:
df.sort_values(by="club_rating", ascending=False, inplace=True)

In [None]:
df.columns

In [None]:
df

In [None]:
# Find the team with the highest rating for each feature
for column in df.columns:
    
    best_team = df.loc[df[column].idxmax()]
    
    # Print the result for the best teams
    print(f"Best team for {column}:")
    print(best_team[[column]])
    print("")

# second way

## create new dataframes for each position grouped by teams

In [51]:
def filter_position(df, club, position, max_count):
    filtered_df = df[(df['club'] == club) & (df[position] == 1)]
    return filtered_df.head(max_count)

def avg_position(df, club, position, max_count):
    filtered_df = filter_position(df, club, position, max_count)
    return filtered_df.groupby('club').mean()

def filter_and_append(avg_position_df, df, club, position, max_count):
    filtered_df = avg_position(df, club, position, max_count)
    return pd.concat([avg_position_df, filtered_df])

goalkeepers = pd.DataFrame()
centerbacks = pd.DataFrame()
fullbacks = pd.DataFrame()
midfielders = pd.DataFrame()
wingers = pd.DataFrame()
strikers = pd.DataFrame()

for club in df['club'].unique():
    goalkeepers = filter_and_append(goalkeepers, df, club, 'goalkeeper', 2)
    centerbacks = filter_and_append(centerbacks, df, club, 'centerback', 3)
    fullbacks = filter_and_append(fullbacks, df, club, 'fullback', 4)
    midfielders = filter_and_append(midfielders, df, club, 'midfielder', 5)
    wingers = filter_and_append(wingers, df, club, 'winger', 4)
    strikers = filter_and_append(strikers, df, club, 'striker', 2)

In [52]:
goalkeepers

Unnamed: 0_level_0,age,height,weight,wage,last trans. fee,value,agg,jum,pun,vis,...,midfielder,striker,winger,division_rating,nat_rating,club_rating,either_left,either_right,left,right
club,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Man City,25.5,189.5,86.0,355500.0,23125000.0,35000000.0,9.5,14.0,9.5,14.0,...,0.0,0.0,0.0,93.8,1744.045,2013.0,0.0,0.0,0.5,0.5
Tottenham,33.0,192.0,81.5,308000.0,6250000.0,16625000.0,12.5,15.0,14.0,8.5,...,0.0,0.0,0.0,93.8,1830.495,1837.0,0.0,0.0,0.5,0.5
Liverpool,30.0,190.5,85.5,415000.0,35000000.0,32750000.0,12.0,15.0,15.0,11.5,...,0.0,0.0,0.0,93.8,1769.085,2042.0,0.0,0.0,0.0,1.0
Man Utd,26.0,190.0,80.5,1114500.0,9500000.0,26750000.0,8.0,14.5,12.0,6.5,...,0.0,0.0,0.0,93.8,1766.925,1717.0,0.0,0.0,0.0,1.0
Aston Villa,30.5,190.5,85.5,225000.0,13875000.0,15750000.0,7.0,14.5,7.5,9.0,...,0.0,0.0,0.0,93.8,1834.585,1652.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Daytona Rush,18.5,185.5,77.5,9.0,0.0,110.0,10.0,12.0,10.5,8.5,...,0.0,0.0,0.0,40.8,1675.890,1156.0,0.0,0.0,0.0,1.0
Eastside FC,19.0,189.0,78.0,9.0,0.0,110.0,12.0,10.5,10.0,7.0,...,0.0,0.0,0.0,40.8,1621.595,1156.0,0.0,0.0,0.0,1.0
OVF Alliance,19.5,188.5,81.0,9.0,0.0,110.0,10.5,10.0,9.0,8.5,...,0.0,0.0,0.0,40.8,1675.890,1156.0,0.0,0.0,0.0,1.0
Austin FC Academy,16.0,188.5,72.5,9.0,0.0,110.0,8.5,13.0,10.5,10.0,...,0.0,0.0,0.0,45.9,1675.890,1156.0,0.0,0.0,0.5,0.5


In [54]:
midfielders

Unnamed: 0_level_0,age,height,weight,wage,last trans. fee,value,agg,jum,pun,vis,...,midfielder,striker,winger,division_rating,nat_rating,club_rating,either_left,either_right,left,right
club,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Man City,25.4,179.20,70.20,644800.0,40200000.0,65100000.0,11.2,9.80,2.20,16.60,...,1.0,0.0,0.0,93.8,1742.176,2013.0,0.0,0.2,0.4,0.4
Tottenham,24.6,182.20,77.80,474000.0,27900000.0,32800000.0,14.0,10.40,1.60,14.40,...,1.0,0.0,0.0,93.8,1799.144,1837.0,0.0,0.0,0.4,0.6
Liverpool,27.8,176.00,72.00,630600.0,33300000.0,48500000.0,13.2,10.40,2.40,14.60,...,1.0,0.0,0.0,93.8,1675.576,2042.0,0.0,0.0,0.0,1.0
Man Utd,26.6,183.40,73.80,692200.0,54100000.0,43750000.0,13.8,11.80,2.40,14.80,...,1.0,0.0,0.0,93.8,1733.960,1717.0,0.0,0.0,0.4,0.6
Aston Villa,24.4,178.60,70.20,317800.0,9760000.0,28950000.0,13.6,8.40,2.20,14.40,...,1.0,0.0,0.0,93.8,1546.502,1652.0,0.0,0.0,0.4,0.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Daytona Rush,18.6,179.40,73.20,9.0,0.0,110.0,10.2,7.60,2.00,9.60,...,1.0,0.0,0.0,40.8,1675.890,1156.0,0.0,0.0,0.0,1.0
Eastside FC,19.0,174.25,70.50,9.0,0.0,110.0,12.0,6.75,2.25,9.00,...,1.0,0.0,0.0,40.8,1675.890,1156.0,0.0,0.0,0.0,1.0
OVF Alliance,19.8,180.20,73.20,9.0,0.0,110.0,10.6,10.00,2.20,8.60,...,1.0,0.0,0.0,40.8,1629.304,1156.0,0.0,0.0,0.0,1.0
Austin FC Academy,17.0,174.75,64.75,9.0,0.0,110.0,9.5,8.75,2.00,8.75,...,1.0,0.0,0.0,45.9,1675.890,1156.0,0.0,0.0,0.5,0.5


## df grouped by teams

In [29]:
def filter_and_append(avg_clubs, df, club, position, max_count):
    filtered_df = filter_position(df, club, position, max_count)
    return pd.concat([avg_clubs, filtered_df])

avg_clubs = pd.DataFrame()

for club in df['club'].unique():
    avg_clubs = filter_and_append(avg_clubs, df, club, 'goalkeeper', 2)
    avg_clubs = filter_and_append(avg_clubs, df, club, 'centerback', 3)
    avg_clubs = filter_and_append(avg_clubs, df, club, 'fullback', 4)
    avg_clubs = filter_and_append(avg_clubs, df, club, 'winger', 4)
    avg_clubs = filter_and_append(avg_clubs, df, club, 'midfielder', 5)
    avg_clubs = filter_and_append(avg_clubs, df, club, 'striker', 2)
    
# Step 2: Group by 'club' and take the mean values
df = avg_clubs.groupby('club').mean()

# Create a new dataset with the numerical features for the followings steps.
columns_to_remove = ['age', 'height', 'weight',"wage", "last trans. fee", "value"]

df.drop(columns=columns_to_remove, inplace=True)

Unnamed: 0,name,club,nat,position,dob,age,height,weight,wage,last trans. fee,...,midfielder,striker,winger,division_rating,nat_rating,club_rating,either_left,either_right,left,right
24,Ederson,Man City,BRA,GK,17/8/1993 (26 years old),26.0,188.0,86.0,474000.0,38500000.0,...,0,0,0,93.8,1812.20,2013.0,0,0,1,0
246,Zack Steffen,Man City,USA,GK,2/4/1995 (25 years old),25.0,191.0,86.0,237000.0,7750000.0,...,0,0,0,93.8,1675.89,2013.0,0,0,0,1
10,Aymeric Laporte,Man City,FRA,D (C),27/5/1994 (26 years old),26.0,191.0,85.0,569000.0,63000000.0,...,0,0,0,93.8,1853.11,2013.0,0,0,1,0
19,Rúben Dias,Man City,POR,D (C),14/5/1997 (23 years old),23.0,187.0,83.0,332000.0,68000000.0,...,0,0,0,93.8,1739.83,2013.0,0,0,0,1
45,John Stones,Man City,ENG,D (C),28/5/1994 (26 years old),26.0,188.0,72.0,474000.0,52000000.0,...,0,0,0,93.8,1807.88,2013.0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174316,Marcos Leal,Charlotte FC Academy,USA,AM (RLC),8/12/2004 (15 years old),15.0,166.0,55.0,9.0,0.0,...,1,0,0,40.8,1675.89,1156.0,0,0,1,0
174317,Nathan Palmer,Charlotte FC Academy,USA,AM (C),11/2/2004 (16 years old),16.0,178.0,67.0,9.0,0.0,...,1,0,0,40.8,1675.89,1156.0,0,0,0,1
174322,Yeferson Suárez,Charlotte FC Academy,USA,M (C),4/1/2004 (16 years old),16.0,162.0,55.0,9.0,0.0,...,1,0,0,40.8,1675.89,1156.0,0,0,0,1
174318,Darren Cox,Charlotte FC Academy,USA,ST (C),15/12/2003 (16 years old),16.0,174.0,62.0,9.0,0.0,...,0,1,0,40.8,1675.89,1156.0,0,0,0,1


## grouped features

### definition

In [55]:
def create_grouped_features(df):
    # Offensive Skills
    df['shooting'] = df[['fin', 'lon', 'fre', 'pen']].mean(axis=1)
    df['dribbling_control'] = df[['dri', 'fir', 'fla', 'tec']].mean(axis=1)
    df['passing_vision'] = df[['pas', 'vis', 'l th', 'cro', 'cor', 'otb']].mean(axis=1)
    
    # Defensive Skills
    df['tackling_interception'] = df[['tck', 'mar', 'pos', 'ant']].mean(axis=1)
    df['aerial_defense'] = df[['hea', 'jum', 'aer']].mean(axis=1)
    
    # Physical Attributes
    df['speed_agility'] = df[['acc', 'pac', 'agi']].mean(axis=1)
    df['strength_stamina'] = df[['str', 'sta', 'bal']].mean(axis=1)
    
    # Mental and Tactical Attributes
    df['decision_making'] = df[['dec', 'cmp']].mean(axis=1)
    df['work_ethic_effort'] = df[['wor', 'det', 'bra']].mean(axis=1)
    
    # Leadership and Teamwork
    df['leadership'] = df[['ldr', 'com']].mean(axis=1)
    df['teamwork'] = df['tea']
    
    # Goalkeeping Abilities
    df['goalkeeping_abilities'] = df[['han', 'ref', 'kic', 'thr', '1v1', 'ecc', 'cmd']].mean(axis=1)
    
    # Behavioral Attributes
    df['behavioral_attributes'] = df[['agg', 'pun']].mean(axis=1)
    
    # Player Traits
    df['player_traits'] = df['tro']
    
    # Create a new DataFrame with the grouped features
    dfgf = df[['shooting', 'dribbling_control', 'passing_vision', 'tackling_interception', 
               'aerial_defense', 'speed_agility', 'strength_stamina', 'decision_making', 
               'work_ethic_effort', 'leadership', 'teamwork', 'goalkeeping_abilities', 
               'behavioral_attributes', 'player_traits', 'club_rating']]
    
    return dfgf

In [56]:
test = create_grouped_features(midfielders)

In [60]:
test.shooting.max()

13.45

In [61]:
test.shooting.min()

1.375

In [8]:
# Grouping and creating new features by calculating the mean of each group
# Offensive Skills
df['shooting'] = df[['fin', 'lon', 'fre', 'pen']].mean(axis=1)
df['dribbling_control'] = df[['dri', 'fir', 'fla', 'tec']].mean(axis=1)
df['passing_vision'] = df[['pas', 'vis', 'l th', 'cro', 'cor', 'otb']].mean(axis=1)
# Defensive Skills
df['tackling_interception'] = df[['tck', 'mar', 'pos', 'ant']].mean(axis=1)
df['aerial_defense'] = df[['hea', 'jum', 'aer']].mean(axis=1)
# Physical Attributes
df['speed_agility'] = df[['acc', 'pac', 'agi']].mean(axis=1)
df['strength_stamina'] = df[['str', 'sta', 'bal']].mean(axis=1)
# Mental and Tactical Attributes
df['decision_making'] = df[['dec', 'cmp']].mean(axis=1)
df['work_ethic_effort'] = df[['wor', 'det', 'bra']].mean(axis=1)
# Leadership and Teamwork
df['leadership'] = df[['ldr', 'com']].mean(axis=1)
df['teamwork'] = df['tea']
# Goalkeeping Abilities
df['goalkeeping_abilities'] = df[['han', 'ref', 'kic', 'thr', '1v1', 'ecc', 'cmd']].mean(axis=1)
# Behavioral Attributes
df['behavioral_attributes'] = df[['agg', 'pun']].mean(axis=1)
# Player Traits
df['player_traits'] = df['tro']
#'dfgf' is the new DataFrame = DataFrame grouped features
df = df[['shooting', 'dribbling_control', 
           'passing_vision', 'tackling_interception', 
           'aerial_defense', 'speed_agility', 
           'strength_stamina', 'decision_making', 
           'work_ethic_effort', 'leadership', 'teamwork', 
           'goalkeeping_abilities', 'behavioral_attributes', 
           'player_traits', 'club_rating']]

In [9]:
# Find the team with the highest rating for each feature
for column in df.columns:
    
    best_team = df.loc[df[column].idxmax()]
    
    # Print the result for the best teams
    print(f"Best team for {column}:")
    print(best_team[[column]])
    print("")

Best team for shooting:
shooting    10.9375
Name: Tottenham, dtype: float64

Best team for dribbling_control:
dribbling_control    14.3125
Name: Barcelona, dtype: float64

Best team for passing_vision:
passing_vision    11.666667
Name: Barcelona, dtype: float64

Best team for tackling_interception:
tackling_interception    12.3125
Name: Liverpool, dtype: float64

Best team for aerial_defense:
aerial_defense    8.866667
Name: Burnley, dtype: float64

Best team for speed_agility:
speed_agility    14.733333
Name: Man City, dtype: float64

Best team for strength_stamina:
strength_stamina    13.916667
Name: Man Utd, dtype: float64

Best team for decision_making:
decision_making    14.225
Name: Barcelona, dtype: float64

Best team for work_ethic_effort:
work_ethic_effort    14.666667
Name: Lazio, dtype: float64

Best team for leadership:
leadership    7.45
Name: Milan, dtype: float64

Best team for teamwork:
teamwork    14.9
Name: Arsenal, dtype: float64

Best team for goalkeeping_abilities:

In [10]:
df.sort_values(by="club_rating", ascending=False, inplace=True)

In [11]:
df

Unnamed: 0_level_0,shooting,dribbling_control,passing_vision,tackling_interception,aerial_defense,speed_agility,strength_stamina,decision_making,work_ethic_effort,leadership,teamwork,goalkeeping_abilities,behavioral_attributes,player_traits,club_rating
club,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Liverpool,9.437500,13.975000,11.533333,12.312500,8.366667,14.366667,13.850000,14.175000,14.616667,7.175000,14.500000,3.342857,8.425000,3.250000,2042.0
Man City,10.500000,13.937500,11.558333,10.750000,8.250000,14.733333,13.666667,13.825000,13.850000,6.550000,13.900000,3.392857,7.525000,3.300000,2013.0
R. Madrid,10.187500,13.512500,11.083333,10.975000,8.333333,13.933333,13.016667,12.775000,13.766667,6.800000,12.900000,3.200000,7.125000,2.850000,2010.0
FC Bayern,9.937500,12.037500,10.600000,10.400000,8.016667,13.783333,12.966667,14.000000,13.616667,6.450000,13.650000,3.278571,7.200000,3.900000,1955.0
Inter,10.013889,12.069444,10.250000,11.013889,8.444444,13.500000,13.481481,12.944444,14.296296,7.333333,14.611111,3.277778,7.861111,3.500000,1917.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Goianésia,6.263158,8.618421,7.219298,9.013158,6.789474,11.807018,9.280702,9.815789,10.228070,5.921053,8.631579,2.932331,6.815789,2.947368,1156.0
Gold Coast Knights,5.928571,8.321429,6.761905,8.214286,5.428571,12.142857,7.880952,9.821429,9.857143,6.000000,8.285714,3.193878,6.285714,2.857143,1156.0
Gold Coast Utd (NPL),4.173077,6.807692,5.794872,8.096154,6.538462,11.051282,7.564103,9.076923,8.512821,5.346154,7.153846,2.582418,5.423077,2.615385,1156.0
Goldau,4.328947,7.447368,5.649123,7.157895,5.350877,12.368421,7.263158,8.842105,8.877193,5.052632,7.000000,2.969925,6.710526,3.368421,1156.0


In [14]:
def calculate_team_score(df, playing_style):
    relevant_features = playing_style['relevant_features']
    weights = playing_style['weights']

    # Filter DataFrame to include only relevant features
    relevant_df = df[relevant_features]

    # Normalize the relevant features to have values between 0 and 1
    normalized_df = (relevant_df - relevant_df.min()) / (relevant_df.max() - relevant_df.min())

    # Calculate the weighted sum for each team
    team_scores = (normalized_df * weights).sum(axis=1)

    # Get the team with the highest score
    best_team = team_scores.idxmax()

    return best_team, team_scores

# Example playing style for Possession-Based Style
pressing_style = {
    'relevant_features': ['decision_making', 'work_ethic_effort', 'teamwork', 'speed_agility'],
    'weights': [0.4, 0.25, 0.15, 0.2]
}

possession_style = {
    'relevant_features': ['passing_vision', 'decision_making', 'teamwork', 'dribbling_control', 'work_ethic_effort'],
    'weights': [0.2, 0.2, 0.2, 0.2, 0.2]
}

# Calculate the best team for the Possession-Based Style
best_team, team_scores = calculate_team_score(df, possession_style)

print(f"Best team for Possession-Based Style: {best_team}")
print("Team Scores:")
pd.DataFrame(team_scores).sort_values(by=0, axis=0, ascending=False)



Best team for Possession-Based Style: Liverpool
Team Scores:


Unnamed: 0_level_0,0
club,Unnamed: 1_level_1
Liverpool,0.982066
Barcelona,0.953833
Man City,0.947978
A. Madrid,0.947358
Tottenham,0.933830
...,...
Tabuan U18s,0.059826
BSRC,0.054816
Setia Perdana,0.053720
Tunas,0.035930


In [15]:
# Calculate the best team for the Possession-Based Style
best_team, team_scores = calculate_team_score(df, pressing_style)

print(f"Best team for Pressing-Based Style: {best_team}")
print("Team Scores:")
pd.DataFrame(team_scores).sort_values(by=0, axis=0, ascending=False)

Best team for Pressing-Based Style: Liverpool
Team Scores:


Unnamed: 0_level_0,0
club,Unnamed: 1_level_1
Liverpool,0.981821
Man City,0.948671
A. Madrid,0.942543
Barcelona,0.935466
Paris SG,0.932657
...,...
BSRC,0.148985
Setia Perdana,0.143505
Tabuan U18s,0.125009
Tunas,0.119441


In [48]:
def compare_teams(df, team1, team2):
    df1 = df.loc[team1]
    df2 = df.loc[team2]
    return pd.DataFrame(df1.subtract(df2)).T

In [62]:
Nino = compare_teams(test, 'Man City', 'Club Brugge')

In [63]:
def custom_scaler(dfgf_no_name):
    total_score = dfgf_no_name.sum(axis=1)
    scaled_dfgf_no_name = dfgf_no_name.div(total_score, axis=0)
    return scaled_dfgf_no_name

In [65]:
Nino

Unnamed: 0,shooting,dribbling_control,passing_vision,tackling_interception,aerial_defense,speed_agility,strength_stamina,decision_making,work_ethic_effort,leadership,teamwork,goalkeeping_abilities,behavioral_attributes,player_traits,club_rating
0,2.5,3.5,2.1,1.7,-0.8,3.266667,2.4,2.7,0.8,-0.6,0.6,0.142857,-1.5,0.4,356.0


In [67]:
custom_scaler(Nino.drop(columns=['club_rating', 'goalkeeping_abilities']))

Unnamed: 0,shooting,dribbling_control,passing_vision,tackling_interception,aerial_defense,speed_agility,strength_stamina,decision_making,work_ethic_effort,leadership,teamwork,behavioral_attributes,player_traits
0,0.146484,0.205078,0.123047,0.099609,-0.046875,0.191406,0.140625,0.158203,0.046875,-0.035156,0.035156,-0.087891,0.023438


# third way

In [None]:
# doing everything before scaling

In [None]:
# Step 2: Group by 'club' and take the mean values
df = df.groupby('club').mean()

In [None]:
# Create a new dataset with the numerical features for the followings steps.
columns_to_remove = ['age', 'height', 'weight',"wage", "last trans. fee", "value"]

In [None]:
df.drop(columns=columns_to_remove, inplace=True)

In [None]:
# Function to calculate the team score based on features and weights
def calculate_team_score(row, weights):
    return sum(row[feature] * weights.get(feature, 0) for feature in row.index[:-1])

# Function to find the best fitting team for a given style
def find_best_fitting_team(teams_df, style_weights):
    teams_df['score'] = teams_df.apply(lambda row: calculate_team_score(row, style_weights), axis=1)
    best_team = teams_df['score'].idxmax()
    return best_team

# Convert teams data to a DataFrame
teams_df = create_teams_dataframe(teams_data)

# Example style weights for a high pressing style
high_pressing_weights = {
    'agg': 0.2, 'jum': 0.15, 'pun': 0.1, 'vis': 0.2, 'l th': 0.15, 'lon': 0.1, 'otb': 0.1, 'tck': 0.2, 'tec': 0.2, 'tea': 0.2,
    'cmp': 0.2, 'fre': 0.15, 'ref': 0.1, 'pos': 0.15, 'pen': 0.1, 'pas': 0.2, 'fla': 0.15, 'ant': 0.15, 'cro': 0.2, 'mar': 0.2,
    'ldr': 0.15, 'cor': 0.1, 'cnt': 0.2, 'det': 0.2, 'dec': 0.15, 'hea': 0.2, 'fir': 0.2, 'com': 0.2, 'acc': 0.25, 'pac': 0.2,
    'aer': 0.2, 'str': 0.2, 'thr': 0.15, 'han': 0.2, 'ecc': 0.15, 'dri': 0.2, 'bal': 0.15, 'kic': 0.1, 'sta': 0.2, 'agi': 0.2,
    'wor': 0.15, 'bra': 0.2, 'cmd': 0.2, 'fin': 0.2, '1v1': 0.15, 'tro': 0.2,
}

best_fit_team = find_best_fitting_team(df, high_pressing_weights)
print(f"The team that best fits the high pressing style is: {best_fit_team}")


In [None]:
df[df.index == 'Eibar']

In [None]:
df.sort_values(by='score', ascending=False)

In [None]:
# Example style weights for a possession style
possession_weights = {
    'agg': 0.15, 'jum': 0.1, 'pun': 0.1, 'vis': 0.2, 'l th': 0.2, 'lon': 0.2, 'otb': 0.2, 'tck': 0.1, 'tec': 0.25, 'tea': 0.25,
    'cmp': 0.25, 'fre': 0.2, 'ref': 0.2, 'pos': 0.3, 'pen': 0.25, 'pas': 0.3, 'fla': 0.25, 'ant': 0.2, 'cro': 0.25, 'mar': 0.15,
    'ldr': 0.2, 'cor': 0.2, 'cnt': 0.1, 'det': 0.1, 'dec': 0.25, 'hea': 0.1, 'fir': 0.1, 'com': 0.25, 'acc': 0.15, 'pac': 0.15,
    'aer': 0.1, 'str': 0.15, 'thr': 0.2, 'han': 0.15, 'ecc': 0.15, 'dri': 0.3, 'bal': 0.3, 'kic': 0.2, 'sta': 0.2, 'agi': 0.3,
    'wor': 0.15, 'bra': 0.25, 'cmd': 0.3, 'fin': 0.15, '1v1': 0.2, 'tro': 0.2,
}


In [None]:
best_fit_team_possession = find_best_fitting_team(teams_df, possession_weights)
print(f"The team that best fits the possession style is: {best_fit_team_possession}")

In [None]:
teams_df

In [None]:
df.head(2)

# fourth way

In [None]:
def filter_position(df, club, position, max_count):
    filtered_df = df[(df['club'] == club) & (df[position] == 1)]
    return filtered_df.head(max_count)

def filter_and_append(avg_clubs, df, club, position, max_count):
    filtered_df = filter_position(df, club, position, max_count)
    return pd.concat([avg_clubs, filtered_df])

avg_clubs = pd.DataFrame()

for club in df['club'].unique():
    avg_clubs = filter_and_append(avg_clubs, df, club, 'goalkeeper', 2)
    avg_clubs = filter_and_append(avg_clubs, df, club, 'centerback', 3)
    avg_clubs = filter_and_append(avg_clubs, df, club, 'fullback', 4)
    avg_clubs = filter_and_append(avg_clubs, df, club, 'winger', 4)
    avg_clubs = filter_and_append(avg_clubs, df, club, 'midfielder', 5)
    avg_clubs = filter_and_append(avg_clubs, df, club, 'striker', 2)
    
avg_clubs

In [None]:
# Step 2: Group by 'club' and take the mean values
df = avg_clubs.groupby('club').mean()

# Create a new dataset with the numerical features for the followings steps.
columns_to_remove = ['age', 'height', 'weight',"wage", "last trans. fee", "value"]

df.drop(columns=columns_to_remove, inplace=True)

In [None]:
# Function to calculate the team score based on features and weights
def calculate_team_score(row, weights):
    return sum(row[feature] * weights.get(feature, 0) for feature in row.index[:-1])

# Function to find the best fitting team for a given style
def find_best_fitting_team(teams_df, style_weights, style_score):
    teams_df[style_score] = teams_df.apply(lambda row: calculate_team_score(row, style_weights), axis=1)
    best_team = teams_df[style_score].idxmax()
    return best_team

# Example style weights for a high pressing style
high_pressing_weights = {
    'agg': 0.2, 'jum': 0.15, 'pun': 0.1, 'vis': 0.2, 'l th': 0.15, 'lon': 0.1, 'otb': 0.1, 'tck': 0.2, 'tec': 0.2, 'tea': 0.2,
    'cmp': 0.2, 'fre': 0.15, 'ref': 0.1, 'pos': 0.15, 'pen': 0.1, 'pas': 0.2, 'fla': 0.15, 'ant': 0.15, 'cro': 0.2, 'mar': 0.2,
    'ldr': 0.15, 'cor': 0.1, 'cnt': 0.2, 'det': 0.2, 'dec': 0.15, 'hea': 0.2, 'fir': 0.2, 'com': 0.2, 'acc': 0.25, 'pac': 0.2,
    'aer': 0.2, 'str': 0.2, 'thr': 0.15, 'han': 0.2, 'ecc': 0.15, 'dri': 0.2, 'bal': 0.15, 'kic': 0.1, 'sta': 0.2, 'agi': 0.2,
    'wor': 0.15, 'bra': 0.2, 'cmd': 0.2, 'fin': 0.2, '1v1': 0.15, 'tro': 0.2,
}

# Example style weights for a high pressing style (focused on important features)
better_high_pressing_weights = {
    'agg': 0.2, 'vis': 0.2, 'tck': 0.2, 'cmp': 0.2, 'fre': 0.15, 'pos': 0.15,
    'pas': 0.2, 'ant': 0.15, 'cor': 0.1, 'cnt': 0.2, 'det': 0.2, 'dec': 0.15,
    'acc': 0.25, 'pac': 0.2, 'str': 0.2, 'sta': 0.2, 'agi': 0.2,
}

best_fit_team = find_best_fitting_team(df, better_high_pressing_weights, 'press_score')
print(f"The team that best fits the high pressing style is: {best_fit_team}")

best_fit_team = find_best_fitting_team(df, possession_weights, 'poss_score')
print(f"The team that best fits the possesion style is: {best_fit_team}")

In [None]:
df.sort_values(by='press_score', ascending=False).head(10)

In [None]:
df.sort_values(by='poss_score', ascending=False).head(10)

In [None]:
df[df.index == 'Club Brugge']

In [None]:
# Function to find the best fitting team for a given style
def find_best_fitting_team(teams_df, style_weights):
    teams_df['style_score'] = teams_df.apply(lambda row: calculate_team_score(row, style_weights), axis=1)
    best_team = teams_df['style_score'].idxmax()
    return best_team

# Example style weights for a possession style
possession_weights = {
    'agg': 0.15, 'jum': 0.1, 'pun': 0.1, 'vis': 0.2, 'l th': 0.2, 'lon': 0.2, 'otb': 0.2, 'tck': 0.1, 'tec': 0.25, 'tea': 0.25,
    'cmp': 0.25, 'fre': 0.2, 'ref': 0.2, 'pos': 0.3, 'pen': 0.25, 'pas': 0.3, 'fla': 0.25, 'ant': 0.2, 'cro': 0.25, 'mar': 0.15,
    'ldr': 0.2, 'cor': 0.2, 'cnt': 0.1, 'det': 0.1, 'dec': 0.25, 'hea': 0.1, 'fir': 0.1, 'com': 0.25, 'acc': 0.15, 'pac': 0.15,
    'aer': 0.1, 'str': 0.15, 'thr': 0.2, 'han': 0.15, 'ecc': 0.15, 'dri': 0.3, 'bal': 0.3, 'kic': 0.2, 'sta': 0.2, 'agi': 0.3,
    'wor': 0.15, 'bra': 0.25, 'cmd': 0.3, 'fin': 0.15, '1v1': 0.2, 'tro': 0.2,
}

# Example style weights for a defensive style
defensive_weights = {
    # Adjust the weights based on the defensive style requirements
    'tck': 0.25, 'det': 0.25, 'pos': 0.2, 'ref': 0.15, 'cmp': 0.2, 'fre': 0.15, 'hea': 0.25, 'com': 0.3,
    'acc': 0.1, 'pac': 0.15, 'aer': 0.25, 'str': 0.3, 'sta': 0.3,
}

# Example style weights for a direct style
direct_weights = {
    # Adjust the weights based on the direct style requirements
    'lon': 0.2, 'hea': 0.2, 'thr': 0.25, 'com': 0.2, 'acc': 0.25, 'pac': 0.2, 'str': 0.3, 'sta': 0.25,
}

# Example style weights for a counter-attacking style
counter_attacking_weights = {
    # Adjust the weights based on the counter-attacking style requirements
    'lon': 0.2, 'hea': 0.2, 'thr': 0.25, 'com': 0.2, 'acc': 0.25, 'pac': 0.2, 'str': 0.3, 'sta': 0.25,
    'det': 0.2, 'dec': 0.25, 'pas': 0.2, 'fla': 0.15, 'ant': 0.2, 'mar': 0.25,
}

# Find the best fitting team for each style
best_fit_team_possession = find_best_fitting_team(df, possession_weights)
best_fit_team_defensive = find_best_fitting_team(df, defensive_weights)
best_fit_team_direct = find_best_fitting_team(df, direct_weights)
best_fit_team_counter_attacking = find_best_fitting_team(df, counter_attacking_weights)

# Print the results
print(f"The team that best fits the possession style is: {best_fit_team_possession}")
print(f"The team that best fits the defensive style is: {best_fit_team_defensive}")
print(f"The team that best fits the direct style is: {best_fit_team_direct}")
print(f"The team that best fits the counter-attacking style is: {best_fit_team_counter_attacking}")
