In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("../raw_data/clean_data.csv")

In [3]:
# Step 1: Drop rows where the club is equal to 'Other'
df = df[df['club'] != 'Other']

## definition

In [4]:
def create_grouped_features(df):
    # Offensive Skills
    df['shooting'] = df[['fin', 'lon', 'fre', 'pen']].mean(axis=1)
    df['dribbling_control'] = df[['dri', 'fir', 'fla', 'tec']].mean(axis=1)
    df['passing_vision'] = df[['pas', 'vis', 'l th', 'cro', 'cor', 'otb']].mean(axis=1)
    
    # Defensive Skills
    df['tackling_interception'] = df[['tck', 'mar', 'pos', 'ant']].mean(axis=1)
    df['aerial_defense'] = df[['hea', 'jum', 'aer']].mean(axis=1)
    
    # Physical Attributes
    df['speed_agility'] = df[['acc', 'pac', 'agi']].mean(axis=1)
    df['strength_stamina'] = df[['str', 'sta', 'bal']].mean(axis=1)
    
    # Mental and Tactical Attributes
    df['decision_making'] = df[['dec', 'cmp']].mean(axis=1)
    df['work_ethic_effort'] = df[['wor', 'det', 'bra']].mean(axis=1)
    
    # Leadership and Teamwork
    df['leadership'] = df[['ldr', 'com']].mean(axis=1)
    df['teamwork'] = df['tea']
    
    # Goalkeeping Abilities
    df['goalkeeping_abilities'] = df[['han', 'ref', 'kic', 'thr', '1v1', 'ecc', 'cmd']].mean(axis=1)
    
    # Behavioral Attributes
    df['behavioral_attributes'] = df[['agg', 'pun']].mean(axis=1)
    
    # Player Traits
    df['player_traits'] = df['tro']
    
    # Create a new DataFrame with the grouped features
    dfgf = df[['shooting', 'dribbling_control', 'passing_vision', 'tackling_interception', 
               'aerial_defense', 'speed_agility', 'strength_stamina', 'decision_making', 
               'work_ethic_effort', 'leadership', 'teamwork', 'goalkeeping_abilities', 
               'behavioral_attributes', 'player_traits', 'club_rating']]
    
    return dfgf

In [5]:
# Find the team with the highest rating for each feature
#for column in df.columns:
#    
#    best_team = df.loc[df[column].idxmax()]
#    
#    # Print the result for the best teams
#    print(f"Best team for {column}:")
#    print(best_team[[column]])
#    print("")

## team_scores for each style

In [7]:
df = create_grouped_features(df)

In [9]:
df

Unnamed: 0,shooting,dribbling_control,passing_vision,tackling_interception,aerial_defense,speed_agility,strength_stamina,decision_making,work_ethic_effort,leadership,teamwork,goalkeeping_abilities,behavioral_attributes,player_traits,club_rating
0,16.25,16.25,15.833333,9.50,5.666667,14.000000,15.000000,16.5,14.000000,7.0,14.0,1.714286,6.5,3.0,2013.0
1,16.75,13.75,13.833333,12.00,10.333333,12.333333,14.666667,17.0,17.666667,8.0,18.0,2.571429,6.5,3.0,1837.0
2,14.50,15.50,13.833333,9.75,6.666667,17.333333,15.666667,14.0,14.000000,6.5,14.0,2.142857,6.0,2.0,2042.0
3,11.50,16.50,11.833333,11.25,9.000000,17.333333,14.000000,15.0,13.666667,4.5,15.0,2.142857,6.5,4.0,2042.0
4,13.25,14.50,12.333333,8.50,7.666667,15.333333,12.666667,15.0,12.000000,8.0,15.0,2.000000,5.0,1.0,1837.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174319,7.75,10.00,6.000000,4.00,5.333333,11.333333,9.333333,9.5,10.666667,3.5,10.0,1.714286,8.0,3.0,1156.0
174320,6.00,10.50,5.666667,3.50,4.333333,9.333333,8.000000,8.0,7.000000,2.0,8.0,2.428571,8.5,3.0,1156.0
174321,4.50,5.50,6.333333,9.25,8.666667,7.666667,11.666667,8.5,14.000000,2.0,13.0,1.714286,7.5,1.0,1156.0
174322,6.25,8.50,6.833333,6.50,3.000000,7.333333,6.000000,12.0,8.333333,8.0,16.0,2.142857,8.0,1.0,1156.0


In [8]:
def calculate_team_score(df, playing_style):
    relevant_features = playing_style['relevant_features']
    weights = playing_style['weights']

    # Filter DataFrame to include only relevant features
    relevant_df = df[relevant_features]

    # Normalize the relevant features to have values between 0 and 1
    normalized_df = (relevant_df - relevant_df.min()) / (relevant_df.max() - relevant_df.min())

    # Calculate the weighted sum for each team
    team_scores = (normalized_df * weights).sum(axis=1)

    # Get the team with the highest score
    best_team = team_scores.idxmax()

    return best_team, team_scores

# Example playing style for Possession-Based Style
pressing_style = {
    'relevant_features': ['decision_making', 'work_ethic_effort', 'teamwork', 'speed_agility'],
    'weights': [0.4, 0.25, 0.15, 0.2]
}

possession_style = {
    'relevant_features': ['passing_vision', 'decision_making', 'teamwork', 'dribbling_control', 'work_ethic_effort'],
    'weights': [0.3, 0.2, 0.1, 0.3, 0.1]
}

# Calculate the best team for the Possession-Based Style
best_team, team_scores = calculate_team_score(df, possession_style)

print(f"Best team for Possession-Based Style: {best_team}")
print("Team Scores:")
pd.DataFrame(team_scores).sort_values(by=0, axis=0, ascending=False)



Best team for Possession-Based Style: 68559
Team Scores:


Unnamed: 0,0
68559,0.901593
69356,0.860984
48,0.854903
69191,0.852954
0,0.842586
...,...
155128,0.047739
165663,0.045187
155581,0.042243
155568,0.041483


In [None]:
# Calculate the best team for the Possession-Based Style
best_team, team_scores = calculate_team_score(df, pressing_style)

print(f"Best team for Pressing-Based Style: {best_team}")
print("Team Scores:")
pd.DataFrame(team_scores).sort_values(by=0, axis=0, ascending=False)

## comparator and scaler

In [None]:
def custom_scaler(dfgf_no_name):
    total_score = dfgf_no_name.sum(axis=1)
    scaled_dfgf_no_name = dfgf_no_name.div(total_score, axis=0)
    return scaled_dfgf_no_name

In [None]:
def compare_teams(df, team1, team2):
    df1 = df.loc[team1]
    df2 = df.loc[team2]
    return custom_scaler(pd.DataFrame(df1.subtract(df2)).T.drop(columns=['club_rating', 'goalkeeping_abilities']))

In [None]:
Nino = compare_teams(test, 'Man City', 'Club Brugge')

In [None]:
Nino