In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("../raw_data/clean_data.csv")

In [3]:
df.columns

Index(['name', 'club', 'nat', 'position', 'dob', 'age', 'height', 'weight',
       'wage', 'last trans. fee', 'value', 'agg', 'jum', 'pun', 'vis', 'l th',
       'lon', 'otb', 'tck', 'tec', 'tea', 'cmp', 'fre', 'ref', 'pos', 'pen',
       'pas', 'fla', 'ant', 'cro', 'mar', 'ldr', 'cor', 'cnt', 'det', 'dec',
       'hea', 'fir', 'com', 'acc', 'pac', 'aer', 'str', 'thr', 'han', 'ecc',
       'dri', 'bal', 'kic', 'sta', 'agi', 'wor', 'bra', 'cmd', 'fin', '1v1',
       'tro', 'centerback', 'fullback', 'goalkeeper', 'midfielder', 'striker',
       'winger', 'division_rating', 'nat_rating', 'club_rating', 'either_left',
       'either_right', 'left', 'right'],
      dtype='object')

In [4]:
# Step 1: Drop rows where the club is equal to 'Other'
df = df[df['club'] != 'Other']

#  create fictive player

## create new dataframes for each position grouped by teams

In [5]:
def filter_position(df, club, position, max_count):
    filtered_df = df[(df['club'] == club) & (df[position] == 1)]
    return filtered_df.head(max_count)

def avg_position(df, club, position, max_count):
    filtered_df = filter_position(df, club, position, max_count)
    return filtered_df.groupby('club').mean()

def filter_and_append(avg_position_df, df, club, position, max_count):
    filtered_df = avg_position(df, club, position, max_count)
    return pd.concat([avg_position_df, filtered_df])

goalkeepers = pd.DataFrame()
centerbacks = pd.DataFrame()
fullbacks = pd.DataFrame()
midfielders = pd.DataFrame()
wingers = pd.DataFrame()
strikers = pd.DataFrame()

for club in df['club'].unique():
    goalkeepers = filter_and_append(goalkeepers, df, club, 'goalkeeper', 2)
    centerbacks = filter_and_append(centerbacks, df, club, 'centerback', 3)
    fullbacks = filter_and_append(fullbacks, df, club, 'fullback', 4)
    midfielders = filter_and_append(midfielders, df, club, 'midfielder', 5)
    wingers = filter_and_append(wingers, df, club, 'winger', 4)
    strikers = filter_and_append(strikers, df, club, 'striker', 2)

## grouped features

### definition

In [6]:
def create_grouped_features(df):
    # Offensive Skills
    df['shooting'] = df[['fin', 'lon', 'fre', 'pen']].mean(axis=1)
    df['dribbling_control'] = df[['dri', 'fir', 'fla', 'tec']].mean(axis=1)
    df['passing_vision'] = df[['pas', 'vis', 'l th', 'cro', 'cor', 'otb']].mean(axis=1)
    
    # Defensive Skills
    df['tackling_interception'] = df[['tck', 'mar', 'pos', 'ant']].mean(axis=1)
    df['aerial_defense'] = df[['hea', 'jum', 'aer']].mean(axis=1)
    
    # Physical Attributes
    df['speed_agility'] = df[['acc', 'pac', 'agi']].mean(axis=1)
    df['strength_stamina'] = df[['str', 'sta', 'bal']].mean(axis=1)
    
    # Mental and Tactical Attributes
    df['decision_making'] = df[['dec', 'cmp']].mean(axis=1)
    df['work_ethic_effort'] = df[['wor', 'det', 'bra']].mean(axis=1)
    
    # Leadership and Teamwork
    df['leadership'] = df[['ldr', 'com']].mean(axis=1)
    df['teamwork'] = df['tea']
    
    # Goalkeeping Abilities
    df['goalkeeping_abilities'] = df[['han', 'ref', 'kic', 'thr', '1v1', 'ecc', 'cmd']].mean(axis=1)
    
    # Behavioral Attributes
    df['behavioral_attributes'] = df[['agg', 'pun']].mean(axis=1)
    
    # Player Traits
    df['player_traits'] = df['tro']
    
    # Create a new DataFrame with the grouped features
    dfgf = df[['shooting', 'dribbling_control', 'passing_vision', 'tackling_interception', 
               'aerial_defense', 'speed_agility', 'strength_stamina', 'decision_making', 
               'work_ethic_effort', 'leadership', 'teamwork', 'goalkeeping_abilities', 
               'behavioral_attributes', 'player_traits', 'club_rating']]
    
    return dfgf

### positions dataframe

In [7]:
goalkeepers = create_grouped_features(goalkeepers)
centerbacks = create_grouped_features(centerbacks)
midfielders = create_grouped_features(midfielders)
wingers = create_grouped_features(wingers)
strikers = create_grouped_features(strikers)

In [9]:
def custom_scaler(dfgf_no_name):
    total_score = dfgf_no_name.sum(axis=1)
    scaled_dfgf_no_name = dfgf_no_name.div(total_score, axis=0)
    return scaled_dfgf_no_name

In [14]:
def compare_teams(df, team1, team2):
    df1 = df.loc[team1]
    df2 = df.loc[team2]
    return custom_scaler(pd.DataFrame(df1.subtract(df2)).T.drop(columns=['club_rating', 'goalkeeping_abilities']))

### example

In [15]:
Nino = compare_teams(midfielders, 'Man City', 'Club Brugge')

In [16]:
Nino

Unnamed: 0,shooting,dribbling_control,passing_vision,tackling_interception,aerial_defense,speed_agility,strength_stamina,decision_making,work_ethic_effort,leadership,teamwork,behavioral_attributes,player_traits
0,0.146484,0.205078,0.123047,0.099609,-0.046875,0.191406,0.140625,0.158203,0.046875,-0.035156,0.035156,-0.087891,0.023438


# Part 2