In [175]:
import pandas as pd
import numpy as np
from scipy.stats import zscore
from datetime import datetime

In [177]:
# Sett maksimal visning av kolonner til None (ingen grense)
pd.set_option('display.max_columns', None)

In [179]:
path= r"C:\Users\henri\Desktop\Fotballdata\wyscout\Wyscout_League_Export-15-07-25.csv"
df = pd.read_csv(path)

In [181]:
# Print all unique values in the "Leagues" column
unique_leagues_sorted = sorted(df['League'].unique())
print(unique_leagues_sorted)

['Albania Superiore League 2023-24', 'Albania Superiore League 2024-25', 'Argentina Primera Division 2024', 'Argentina Primera Division 2025', 'Australia A-League 2023-24', 'Australia A-League 2024-25', 'Austria Bundesliga 2023-24', 'Austria Bundesliga 2024-25', 'Belgium First Division A 2023-24', 'Belgium First Division A 2024-25', 'Belgium First Division B 2023-24', 'Belgium First Division B 2024-25', 'Bolivia Primera Division 2024', 'Bolivia Primera Division 2025', 'Bosnia Premijer Liga 2023-24', 'Bosnia Premijer Liga 2024-25', 'Brazil Serie A 2024', 'Brazil Serie A 2025', 'Brazil Serie B 2024', 'Brazil Serie B 2025', 'Bulgaria First League 2023-24', 'Bulgaria First League 2024-25', 'Canada Premier League 2024', 'Canada Premier League 2025', 'Chile Primera Division 2024', 'Chile Primera Division 2025', 'China Super League 2024', 'China Super League 2025', 'Colombia Primera A 2024', 'Colombia Primera A 2025', 'Costa Rica Primera Division 2023-24', 'Costa Rica Primera Division 2024-25

In [129]:
col_list = df.columns.to_list()
print(col_list) #print columns

['Unnamed: 0', 'Player', 'Team', 'Team within selected timeframe', 'Position', 'Age', 'Market value', 'Contract expires', 'Matches played', 'Minutes played', 'Goals', 'xG', 'Assists', 'xA', 'Duels per 90', 'Duels won, %', 'Birth country', 'Passport country', 'Foot', 'Height', 'Weight', 'On loan', 'Successful defensive actions per 90', 'Defensive duels per 90', 'Defensive duels won, %', 'Aerial duels per 90', 'Aerial duels won, %', 'Sliding tackles per 90', 'PAdj Sliding tackles', 'Shots blocked per 90', 'Interceptions per 90', 'PAdj Interceptions', 'Fouls per 90', 'Yellow cards', 'Yellow cards per 90', 'Red cards', 'Red cards per 90', 'Successful attacking actions per 90', 'Goals per 90', 'Non-penalty goals', 'Non-penalty goals per 90', 'xG per 90', 'Head goals', 'Head goals per 90', 'Shots', 'Shots per 90', 'Shots on target, %', 'Goal conversion, %', 'Assists per 90', 'Crosses per 90', 'Accurate crosses, %', 'Crosses from left flank per 90', 'Accurate crosses from left flank, %', 'Cro

In [183]:
import re
class Preprocessing:
    def __init__(self, df, leagues=None, positions=None, min_minutes=None):
        """
        df: DataFrame som skal filtreres.
        leagues: Liste over ligaer som skal inkluderes. Standard er 'England Premier League 2024-25'.
        positions: Liste over posisjoner som skal inkluderes. Standard er ['RB', 'LB', 'RWB', 'LWB'].
        min_minutes: Minimum antall minutter spilt for å bli inkludert i datasettet.
        """
        self.df = df
        self.leagues = leagues if leagues else ['England Premier League 2024-25']
        #self.positions = positions if positions else ['RB', 'LB', 'RWB', 'LWB']
        self.min_minutes = min_minutes
        self.filtered_df = None
    @staticmethod
    def extract_season_from_league(league):
        match = re.search(r"(\d{4})-(\d{2})|(\d{4})", str(league))
        if match:
            if match.group(1) and match.group(2):
                return f"{match.group(1)}-{match.group(2)}"
            return match.group(3)
        return None
    def filter_data(self):
        """Filtrerer DataFrame basert på angitte kriterier."""
        df = self.df.copy()

        # Endre kolonnenavn
        df = df.rename(columns={'Team within selected timeframe': 'squad'})
        df = df.rename(columns={'Team': 'club today'})
        df.columns = df.columns.str.lower()

        # Filtrer basert på liga(er)
        df = df[df['league'].isin(self.leagues)]
        df['season'] = df['league'].apply(self.extract_season_from_league)
        
        # Konverter 'contract expires' til datetime og beregn måneder igjen
        df['contract expires'] = pd.to_datetime(df['contract expires'], errors='coerce')
        today = datetime.today()

        df['contract months left'] = df['contract expires'].apply(
        lambda x: 0 if pd.isna(x) else max(0, int((x.year - today.year) * 12 + (x.month - today.month))))

        # Liste over relevante kolonner
        columns_to_keep = [
            'player', 'club today', 'squad','league', 'position', 'age', 'market value', 'contract months left', 
            'matches played', 'minutes played', 'goals', 'xg', 'assists', 'xa', 
            'duels per 90', 'duels won, %', 'birth country', 'passport country', 'foot', 'height', 
            'weight', 'on loan', 'successful defensive actions per 90', 'defensive duels per 90', 
            'defensive duels won, %', 'aerial duels per 90', 'aerial duels won, %', 'sliding tackles per 90', 
            'padj sliding tackles', 'shots blocked per 90', 'interceptions per 90', 'padj interceptions', 
            'fouls per 90', 'yellow cards', 'yellow cards per 90', 'red cards', 'red cards per 90', 
            'successful attacking actions per 90', 'goals per 90', 'non-penalty goals', 'non-penalty goals per 90', 
            'xg per 90', 'head goals', 'head goals per 90', 'shots', 'shots per 90', 'shots on target, %', 
            'goal conversion, %', 'assists per 90', 'crosses per 90', 'accurate crosses, %', 
            'crosses from left flank per 90', 'accurate crosses from left flank, %', 
            'crosses from right flank per 90', 'accurate crosses from right flank, %', 
            'crosses to goalie box per 90', 'dribbles per 90', 'successful dribbles, %', 
            'offensive duels per 90', 'offensive duels won, %', 'touches in box per 90', 
            'progressive runs per 90', 'accelerations per 90', 'received passes per 90', 
            'received long passes per 90', 'fouls suffered per 90', 'passes per 90', 'accurate passes, %', 
            'forward passes per 90', 'accurate forward passes, %', 'back passes per 90', 
            'accurate back passes, %', 'lateral passes per 90', 'accurate lateral passes, %', 
            'short / medium passes per 90', 'accurate short / medium passes, %', 'long passes per 90', 
            'accurate long passes, %', 'average pass length, m', 'average long pass length, m', 
            'xa per 90', 'shot assists per 90', 'second assists per 90', 'third assists per 90', 
            'smart passes per 90', 'accurate smart passes, %', 'key passes per 90', 
            'passes to final third per 90', 'accurate passes to final third, %', 
            'passes to penalty area per 90', 'accurate passes to penalty area, %', 
            'through passes per 90', 'accurate through passes, %', 'deep completions per 90', 
            'deep completed crosses per 90', 'progressive passes per 90', 'accurate progressive passes, %', 
            'main position', 'season',
        ]
        
        # Behold kun relevante kolonner
        df = df[columns_to_keep]

        # Filtrer basert på posisjoner
        #df = df[df['main position'].isin(self.positions)]

        # Filtrer basert på minimum minutter spilt
        #df = df[df['minutes played'] > self.min_minutes]
        df['age'] = df['age'].fillna(-1).astype(int)


        self.filtered_df = df
        return self.filtered_df
        
leagues = ['Spain La Liga 2024-25', 'Italy Serie A 2024-25', 'France Ligue 1 2024-25', 'Germany Bundesliga 2024-25', 'England Premier League 2024-25','Belgium First Division A 2024-25','Netherlands Eredivisie 2024-25','Portugal Primeira Liga 2024-25']
positions=None # can be adjusted
#min_minutes=900  # can be adjusted
preprocessor = Preprocessing(df, leagues)
filtered_df = preprocessor.filter_data()

In [191]:
filtered_df.head()

Unnamed: 0,player,club today,squad,league,position,age,market value,contract months left,matches played,minutes played,goals,xg,assists,xa,duels per 90,"duels won, %",birth country,passport country,foot,height,weight,on loan,successful defensive actions per 90,defensive duels per 90,"defensive duels won, %",aerial duels per 90,"aerial duels won, %",sliding tackles per 90,padj sliding tackles,shots blocked per 90,interceptions per 90,padj interceptions,fouls per 90,yellow cards,yellow cards per 90,red cards,red cards per 90,successful attacking actions per 90,goals per 90,non-penalty goals,non-penalty goals per 90,xg per 90,head goals,head goals per 90,shots,shots per 90,"shots on target, %","goal conversion, %",assists per 90,crosses per 90,"accurate crosses, %",crosses from left flank per 90,"accurate crosses from left flank, %",crosses from right flank per 90,"accurate crosses from right flank, %",crosses to goalie box per 90,dribbles per 90,"successful dribbles, %",offensive duels per 90,"offensive duels won, %",touches in box per 90,progressive runs per 90,accelerations per 90,received passes per 90,received long passes per 90,fouls suffered per 90,passes per 90,"accurate passes, %",forward passes per 90,"accurate forward passes, %",back passes per 90,"accurate back passes, %",lateral passes per 90,"accurate lateral passes, %",short / medium passes per 90,"accurate short / medium passes, %",long passes per 90,"accurate long passes, %","average pass length, m","average long pass length, m",xa per 90,shot assists per 90,second assists per 90,third assists per 90,smart passes per 90,"accurate smart passes, %",key passes per 90,passes to final third per 90,"accurate passes to final third, %",passes to penalty area per 90,"accurate passes to penalty area, %",through passes per 90,"accurate through passes, %",deep completions per 90,deep completed crosses per 90,progressive passes per 90,"accurate progressive passes, %",main position,season
0,B. van Rooij,Twente,Twente,Netherlands Eredivisie 2024-25,RB,24,2000000,31,36,3486,1,1.63,6,3.74,14.35,58.45,Netherlands,Netherlands,right,174,72,no,8.57,6.12,70.46,2.58,49.0,0.13,0.17,0.23,4.13,5.46,0.65,1,0.03,0,0.0,2.35,0.03,1,0.03,0.04,0,0.0,23,0.59,21.74,4.348,0.15,3.18,39.02,0.05,0.0,3.1,39.17,0.62,1.37,69.81,3.61,51.43,0.75,1.81,0.59,33.1,0.96,0.83,47.19,80.8,16.73,72.07,6.92,95.9,17.4,83.53,40.02,86.84,4.1,53.46,19.97,35.45,0.1,0.72,0.05,0.03,0.03,100.0,0.41,5.5,73.24,3.3,46.88,0.28,27.27,0.75,1.21,11.02,70.96,RB,2024-25
1,N. Olij,Sparta Rotterdam,Sparta Rotterdam,Netherlands Eredivisie 2024-25,GK,29,4000000,31,34,3396,0,0.0,1,0.61,0.85,87.5,Netherlands,Netherlands,right,185,74,no,1.25,0.05,0.0,0.64,95.83,0.0,0.0,0.0,1.25,1.48,0.03,1,0.03,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,100.0,0.0,0.0,0.0,22.5,0.16,0.08,32.04,88.83,16.88,80.85,0.03,100.0,13.83,98.28,20.11,98.95,11.87,72.1,31.57,50.29,0.02,0.05,0.0,0.0,0.0,0.0,0.05,3.79,59.44,0.34,15.38,0.37,57.14,0.08,0.0,9.7,83.88,GK,2024-25
2,F. de Keijzer,Heracles,Heracles,Netherlands Eredivisie 2024-25,GK,25,500000,19,34,3353,0,0.0,0,0.53,0.67,68.0,Netherlands,Netherlands,right,193,83,no,1.07,0.16,33.33,0.32,91.67,0.0,0.0,0.0,1.02,1.22,0.03,1,0.03,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,50.0,0.0,0.0,0.0,18.2,0.48,0.16,23.7,80.41,15.57,72.24,0.08,100.0,7.52,96.79,10.84,98.27,12.8,65.62,38.78,53.43,0.01,0.03,0.03,0.0,0.0,0.0,0.03,4.7,60.57,0.21,25.0,0.27,40.0,0.05,0.0,10.01,77.75,GK,2024-25
3,T. Didillon-Hödl,Willem II,Willem II,Netherlands Eredivisie 2024-25,GK,29,500000,7,34,3353,0,0.0,0,0.0,0.56,85.71,France,France,left,193,84,no,1.07,0.08,33.33,0.38,100.0,0.0,0.0,0.0,1.05,1.13,0.05,2,0.05,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,100.0,0.0,0.0,0.0,14.04,0.19,0.03,20.43,82.26,12.96,73.29,0.0,0.0,6.68,98.8,10.09,97.07,10.25,68.32,35.18,49.43,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.99,55.41,0.03,0.0,0.08,33.33,0.0,0.0,8.88,73.11,GK,2024-25
4,M. Deijl,Go Ahead Eagles,Go Ahead Eagles,Netherlands Eredivisie 2024-25,RB,27,1200000,19,34,3337,4,4.81,2,2.24,13.03,55.49,Netherlands,Netherlands,right,181,72,no,6.47,5.56,57.77,2.62,48.45,0.16,0.21,0.35,3.1,3.93,1.0,2,0.05,0,0.0,1.24,0.11,2,0.05,0.13,0,0.0,31,0.84,41.94,12.903,0.05,1.7,22.22,0.03,100.0,1.65,21.31,0.35,0.67,72.0,2.72,59.41,1.51,0.76,0.11,26.89,1.62,1.73,35.06,76.92,14.51,64.31,6.26,96.55,10.38,83.64,28.16,86.88,5.31,40.61,21.54,39.7,0.06,0.3,0.0,0.0,0.05,50.0,0.16,6.26,60.78,1.83,32.35,0.35,38.46,0.46,0.43,6.88,63.92,RB,2024-25


In [132]:
filtered_df.league.unique()

array(['Netherlands Eredivisie 2024-25', 'Germany Bundesliga 2024-25',
       'Italy Serie A 2024-25', 'Spain La Liga 2024-25',
       'Portugal Primeira Liga 2024-25', 'France Ligue 1 2024-25',
       'England Premier League 2024-25',
       'Belgium First Division A 2024-25'], dtype=object)

In [133]:
filtered_df['main position'].unique()

array(['RB', 'GK', 'LCB', 'RCB', 'LB', 'RDMF', 'RCMF', 'AMF', 'LAMF',
       'CF', 'LDMF', 'LW', 'LCMF', 'RAMF', 'RW', 'DMF', 'RWF', 'LWF',
       'CB', 'LWB', 'RWB'], dtype=object)

In [134]:
filtered_df.squad.unique()

array(['Twente', 'Sparta Rotterdam', 'Heracles', 'Willem II',
       'Go Ahead Eagles', 'PEC Zwolle', 'Almere City', 'NEC', 'PSV',
       'Utrecht', 'AZ', 'NAC Breda', 'Feyenoord', 'Groningen',
       'Fortuna Sittard', 'RKC Waalwijk', 'Heerenveen', 'Ajax',
       'Bayern München', 'Bayer Leverkusen', 'RB Leipzig',
       'Borussia Dortmund', 'Eintracht Frankfurt', 'Stuttgart',
       'Wolfsburg', 'Freiburg', 'Union Berlin', 'Hoffenheim',
       "Borussia M'gladbach", 'Mainz 05', 'Augsburg', 'Werder Bremen',
       'Heidenheim', 'Bochum', 'St. Pauli', 'Holstein Kiel', 'Lecce',
       'Napoli', 'Roma', 'Torino', 'Parma', 'Milan', 'Lazio', 'Cagliari',
       'Hellas Verona', 'Bologna', 'Empoli', 'Udinese', 'Fiorentina',
       'Venezia', 'Genoa', 'Atalanta', 'Internazionale', 'Juventus',
       'Monza', 'Como', 'Espanyol', 'Getafe', 'Rayo Vallecano', 'Osasuna',
       'Valencia', 'Atlético Madrid', 'Real Sociedad', 'Mallorca',
       'Girona', 'Deportivo Alavés', 'Sevilla', 'Real Madrid'

In [135]:
len(filtered_df)

3934

In [136]:
col_list = filtered_df.columns.to_list()
print(col_list) #print the columns 

['player', 'club today', 'squad', 'league', 'position', 'age', 'market value', 'contract months left', 'matches played', 'minutes played', 'goals', 'xg', 'assists', 'xa', 'duels per 90', 'duels won, %', 'birth country', 'passport country', 'foot', 'height', 'weight', 'on loan', 'successful defensive actions per 90', 'defensive duels per 90', 'defensive duels won, %', 'aerial duels per 90', 'aerial duels won, %', 'sliding tackles per 90', 'padj sliding tackles', 'shots blocked per 90', 'interceptions per 90', 'padj interceptions', 'fouls per 90', 'yellow cards', 'yellow cards per 90', 'red cards', 'red cards per 90', 'successful attacking actions per 90', 'goals per 90', 'non-penalty goals', 'non-penalty goals per 90', 'xg per 90', 'head goals', 'head goals per 90', 'shots', 'shots per 90', 'shots on target, %', 'goal conversion, %', 'assists per 90', 'crosses per 90', 'accurate crosses, %', 'crosses from left flank per 90', 'accurate crosses from left flank, %', 'crosses from right fla

In [137]:
print(filtered_df.isna().sum())

player                              0
club today                         13
squad                               0
league                              0
position                            0
                                 ... 
deep completed crosses per 90     106
progressive passes per 90         106
accurate progressive passes, %    106
main position                       0
season                              0
Length: 99, dtype: int64


In [138]:
# Finn kolonnene som har NaN-verdier
nan_columns = filtered_df.columns[filtered_df.isna().sum() > 0]

# Skriv ut navnene på kolonnene med NaN-verdier
print("Kolonner med NaN-verdier:")
print(nan_columns)

Kolonner med NaN-verdier:
Index(['club today', 'duels per 90', 'duels won, %', 'birth country',
       'passport country', 'foot', 'successful defensive actions per 90',
       'defensive duels per 90', 'defensive duels won, %',
       'aerial duels per 90', 'aerial duels won, %', 'sliding tackles per 90',
       'shots blocked per 90', 'interceptions per 90', 'fouls per 90',
       'yellow cards per 90', 'red cards per 90',
       'successful attacking actions per 90', 'goals per 90',
       'non-penalty goals per 90', 'xg per 90', 'head goals per 90',
       'shots per 90', 'shots on target, %', 'assists per 90',
       'crosses per 90', 'accurate crosses, %',
       'crosses from left flank per 90', 'accurate crosses from left flank, %',
       'crosses from right flank per 90',
       'accurate crosses from right flank, %', 'crosses to goalie box per 90',
       'dribbles per 90', 'successful dribbles, %', 'offensive duels per 90',
       'offensive duels won, %', 'touches in box p

In [139]:
filtered_df.head()

Unnamed: 0,player,club today,squad,league,position,age,market value,contract months left,matches played,minutes played,goals,xg,assists,xa,duels per 90,"duels won, %",birth country,passport country,foot,height,weight,on loan,successful defensive actions per 90,defensive duels per 90,"defensive duels won, %",aerial duels per 90,"aerial duels won, %",sliding tackles per 90,padj sliding tackles,shots blocked per 90,interceptions per 90,padj interceptions,fouls per 90,yellow cards,yellow cards per 90,red cards,red cards per 90,successful attacking actions per 90,goals per 90,non-penalty goals,non-penalty goals per 90,xg per 90,head goals,head goals per 90,shots,shots per 90,"shots on target, %","goal conversion, %",assists per 90,crosses per 90,"accurate crosses, %",crosses from left flank per 90,"accurate crosses from left flank, %",crosses from right flank per 90,"accurate crosses from right flank, %",crosses to goalie box per 90,dribbles per 90,"successful dribbles, %",offensive duels per 90,"offensive duels won, %",touches in box per 90,progressive runs per 90,accelerations per 90,received passes per 90,received long passes per 90,fouls suffered per 90,passes per 90,"accurate passes, %",forward passes per 90,"accurate forward passes, %",back passes per 90,"accurate back passes, %",lateral passes per 90,"accurate lateral passes, %",short / medium passes per 90,"accurate short / medium passes, %",long passes per 90,"accurate long passes, %","average pass length, m","average long pass length, m",xa per 90,shot assists per 90,second assists per 90,third assists per 90,smart passes per 90,"accurate smart passes, %",key passes per 90,passes to final third per 90,"accurate passes to final third, %",passes to penalty area per 90,"accurate passes to penalty area, %",through passes per 90,"accurate through passes, %",deep completions per 90,deep completed crosses per 90,progressive passes per 90,"accurate progressive passes, %",main position,season
0,B. van Rooij,Twente,Twente,Netherlands Eredivisie 2024-25,RB,24,2000000,31,36,3486,1,1.63,6,3.74,14.35,58.45,Netherlands,Netherlands,right,174,72,no,8.57,6.12,70.46,2.58,49.0,0.13,0.17,0.23,4.13,5.46,0.65,1,0.03,0,0.0,2.35,0.03,1,0.03,0.04,0,0.0,23,0.59,21.74,4.348,0.15,3.18,39.02,0.05,0.0,3.1,39.17,0.62,1.37,69.81,3.61,51.43,0.75,1.81,0.59,33.1,0.96,0.83,47.19,80.8,16.73,72.07,6.92,95.9,17.4,83.53,40.02,86.84,4.1,53.46,19.97,35.45,0.1,0.72,0.05,0.03,0.03,100.0,0.41,5.5,73.24,3.3,46.88,0.28,27.27,0.75,1.21,11.02,70.96,RB,2024-25
1,N. Olij,Sparta Rotterdam,Sparta Rotterdam,Netherlands Eredivisie 2024-25,GK,29,4000000,31,34,3396,0,0.0,1,0.61,0.85,87.5,Netherlands,Netherlands,right,185,74,no,1.25,0.05,0.0,0.64,95.83,0.0,0.0,0.0,1.25,1.48,0.03,1,0.03,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,100.0,0.0,0.0,0.0,22.5,0.16,0.08,32.04,88.83,16.88,80.85,0.03,100.0,13.83,98.28,20.11,98.95,11.87,72.1,31.57,50.29,0.02,0.05,0.0,0.0,0.0,0.0,0.05,3.79,59.44,0.34,15.38,0.37,57.14,0.08,0.0,9.7,83.88,GK,2024-25
2,F. de Keijzer,Heracles,Heracles,Netherlands Eredivisie 2024-25,GK,25,500000,19,34,3353,0,0.0,0,0.53,0.67,68.0,Netherlands,Netherlands,right,193,83,no,1.07,0.16,33.33,0.32,91.67,0.0,0.0,0.0,1.02,1.22,0.03,1,0.03,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,50.0,0.0,0.0,0.0,18.2,0.48,0.16,23.7,80.41,15.57,72.24,0.08,100.0,7.52,96.79,10.84,98.27,12.8,65.62,38.78,53.43,0.01,0.03,0.03,0.0,0.0,0.0,0.03,4.7,60.57,0.21,25.0,0.27,40.0,0.05,0.0,10.01,77.75,GK,2024-25
3,T. Didillon-Hödl,Willem II,Willem II,Netherlands Eredivisie 2024-25,GK,29,500000,7,34,3353,0,0.0,0,0.0,0.56,85.71,France,France,left,193,84,no,1.07,0.08,33.33,0.38,100.0,0.0,0.0,0.0,1.05,1.13,0.05,2,0.05,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,100.0,0.0,0.0,0.0,14.04,0.19,0.03,20.43,82.26,12.96,73.29,0.0,0.0,6.68,98.8,10.09,97.07,10.25,68.32,35.18,49.43,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.99,55.41,0.03,0.0,0.08,33.33,0.0,0.0,8.88,73.11,GK,2024-25
4,M. Deijl,Go Ahead Eagles,Go Ahead Eagles,Netherlands Eredivisie 2024-25,RB,27,1200000,19,34,3337,4,4.81,2,2.24,13.03,55.49,Netherlands,Netherlands,right,181,72,no,6.47,5.56,57.77,2.62,48.45,0.16,0.21,0.35,3.1,3.93,1.0,2,0.05,0,0.0,1.24,0.11,2,0.05,0.13,0,0.0,31,0.84,41.94,12.903,0.05,1.7,22.22,0.03,100.0,1.65,21.31,0.35,0.67,72.0,2.72,59.41,1.51,0.76,0.11,26.89,1.62,1.73,35.06,76.92,14.51,64.31,6.26,96.55,10.38,83.64,28.16,86.88,5.31,40.61,21.54,39.7,0.06,0.3,0.0,0.0,0.05,50.0,0.16,6.26,60.78,1.83,32.35,0.35,38.46,0.46,0.43,6.88,63.92,RB,2024-25


In [140]:
filtered_df.season.unique()

array(['2024-25'], dtype=object)

In [141]:
filtered_df.head()

Unnamed: 0,player,club today,squad,league,position,age,market value,contract months left,matches played,minutes played,goals,xg,assists,xa,duels per 90,"duels won, %",birth country,passport country,foot,height,weight,on loan,successful defensive actions per 90,defensive duels per 90,"defensive duels won, %",aerial duels per 90,"aerial duels won, %",sliding tackles per 90,padj sliding tackles,shots blocked per 90,interceptions per 90,padj interceptions,fouls per 90,yellow cards,yellow cards per 90,red cards,red cards per 90,successful attacking actions per 90,goals per 90,non-penalty goals,non-penalty goals per 90,xg per 90,head goals,head goals per 90,shots,shots per 90,"shots on target, %","goal conversion, %",assists per 90,crosses per 90,"accurate crosses, %",crosses from left flank per 90,"accurate crosses from left flank, %",crosses from right flank per 90,"accurate crosses from right flank, %",crosses to goalie box per 90,dribbles per 90,"successful dribbles, %",offensive duels per 90,"offensive duels won, %",touches in box per 90,progressive runs per 90,accelerations per 90,received passes per 90,received long passes per 90,fouls suffered per 90,passes per 90,"accurate passes, %",forward passes per 90,"accurate forward passes, %",back passes per 90,"accurate back passes, %",lateral passes per 90,"accurate lateral passes, %",short / medium passes per 90,"accurate short / medium passes, %",long passes per 90,"accurate long passes, %","average pass length, m","average long pass length, m",xa per 90,shot assists per 90,second assists per 90,third assists per 90,smart passes per 90,"accurate smart passes, %",key passes per 90,passes to final third per 90,"accurate passes to final third, %",passes to penalty area per 90,"accurate passes to penalty area, %",through passes per 90,"accurate through passes, %",deep completions per 90,deep completed crosses per 90,progressive passes per 90,"accurate progressive passes, %",main position,season
0,B. van Rooij,Twente,Twente,Netherlands Eredivisie 2024-25,RB,24,2000000,31,36,3486,1,1.63,6,3.74,14.35,58.45,Netherlands,Netherlands,right,174,72,no,8.57,6.12,70.46,2.58,49.0,0.13,0.17,0.23,4.13,5.46,0.65,1,0.03,0,0.0,2.35,0.03,1,0.03,0.04,0,0.0,23,0.59,21.74,4.348,0.15,3.18,39.02,0.05,0.0,3.1,39.17,0.62,1.37,69.81,3.61,51.43,0.75,1.81,0.59,33.1,0.96,0.83,47.19,80.8,16.73,72.07,6.92,95.9,17.4,83.53,40.02,86.84,4.1,53.46,19.97,35.45,0.1,0.72,0.05,0.03,0.03,100.0,0.41,5.5,73.24,3.3,46.88,0.28,27.27,0.75,1.21,11.02,70.96,RB,2024-25
1,N. Olij,Sparta Rotterdam,Sparta Rotterdam,Netherlands Eredivisie 2024-25,GK,29,4000000,31,34,3396,0,0.0,1,0.61,0.85,87.5,Netherlands,Netherlands,right,185,74,no,1.25,0.05,0.0,0.64,95.83,0.0,0.0,0.0,1.25,1.48,0.03,1,0.03,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,100.0,0.0,0.0,0.0,22.5,0.16,0.08,32.04,88.83,16.88,80.85,0.03,100.0,13.83,98.28,20.11,98.95,11.87,72.1,31.57,50.29,0.02,0.05,0.0,0.0,0.0,0.0,0.05,3.79,59.44,0.34,15.38,0.37,57.14,0.08,0.0,9.7,83.88,GK,2024-25
2,F. de Keijzer,Heracles,Heracles,Netherlands Eredivisie 2024-25,GK,25,500000,19,34,3353,0,0.0,0,0.53,0.67,68.0,Netherlands,Netherlands,right,193,83,no,1.07,0.16,33.33,0.32,91.67,0.0,0.0,0.0,1.02,1.22,0.03,1,0.03,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,50.0,0.0,0.0,0.0,18.2,0.48,0.16,23.7,80.41,15.57,72.24,0.08,100.0,7.52,96.79,10.84,98.27,12.8,65.62,38.78,53.43,0.01,0.03,0.03,0.0,0.0,0.0,0.03,4.7,60.57,0.21,25.0,0.27,40.0,0.05,0.0,10.01,77.75,GK,2024-25
3,T. Didillon-Hödl,Willem II,Willem II,Netherlands Eredivisie 2024-25,GK,29,500000,7,34,3353,0,0.0,0,0.0,0.56,85.71,France,France,left,193,84,no,1.07,0.08,33.33,0.38,100.0,0.0,0.0,0.0,1.05,1.13,0.05,2,0.05,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,100.0,0.0,0.0,0.0,14.04,0.19,0.03,20.43,82.26,12.96,73.29,0.0,0.0,6.68,98.8,10.09,97.07,10.25,68.32,35.18,49.43,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.99,55.41,0.03,0.0,0.08,33.33,0.0,0.0,8.88,73.11,GK,2024-25
4,M. Deijl,Go Ahead Eagles,Go Ahead Eagles,Netherlands Eredivisie 2024-25,RB,27,1200000,19,34,3337,4,4.81,2,2.24,13.03,55.49,Netherlands,Netherlands,right,181,72,no,6.47,5.56,57.77,2.62,48.45,0.16,0.21,0.35,3.1,3.93,1.0,2,0.05,0,0.0,1.24,0.11,2,0.05,0.13,0,0.0,31,0.84,41.94,12.903,0.05,1.7,22.22,0.03,100.0,1.65,21.31,0.35,0.67,72.0,2.72,59.41,1.51,0.76,0.11,26.89,1.62,1.73,35.06,76.92,14.51,64.31,6.26,96.55,10.38,83.64,28.16,86.88,5.31,40.61,21.54,39.7,0.06,0.3,0.0,0.0,0.05,50.0,0.16,6.26,60.78,1.83,32.35,0.35,38.46,0.46,0.43,6.88,63.92,RB,2024-25


In [142]:
filtered_df['main position'].unique()

array(['RB', 'GK', 'LCB', 'RCB', 'LB', 'RDMF', 'RCMF', 'AMF', 'LAMF',
       'CF', 'LDMF', 'LW', 'LCMF', 'RAMF', 'RW', 'DMF', 'RWF', 'LWF',
       'CB', 'LWB', 'RWB'], dtype=object)

In [143]:
filtered_df['main position'].value_counts()

main position
CF      572
RCB     325
LCB     311
GK      307
RB      287
LB      270
LCMF    227
AMF     225
RCMF    198
LAMF    163
RAMF    148
RDMF    128
LDMF    123
RW       97
CB       97
LW       89
LWB      84
DMF      83
RWB      78
RWF      65
LWF      57
Name: count, dtype: int64

In [193]:
from fuzzywuzzy import process
from datetime import datetime

class RetrievePossession:
    def __init__(self, path):
        """Leser inn data og forbereder den for matching."""
        self.df_poss = pd.read_csv(path, encoding="ISO-8859-1")
        self.df_poss.columns = self.df_poss.columns.str.lower()
        self.df_poss['poss_decimal'] = self.df_poss['possession'] / 100 

    def match_teams(self, team_name, possibilities, threshold=80):
        """Bruker fuzzy matching for å finne nærmeste match for hvert lag."""
        match = process.extractOne(team_name, possibilities)
        if match and match[1] >= threshold:
            return match[0]
        return team_name  # Returnerer originalnavn hvis ingen match finnes

    def merge_possession_data(self, processed_df):
        """
        Matcher lag i df med df_poss, og legger til poss_decimal.
        Returnerer en ny DataFrame.
        """
        poss_teams = self.df_poss['squad'].unique()

        # Bruk fuzzy matching på lagene i df
        processed_df['squad_matched'] = processed_df['squad'].apply(lambda x: self.match_teams(x, poss_teams))

        # Merge basert på den matchede 'squad'
        merged_df = pd.merge(processed_df, self.df_poss[['squad', 'poss_decimal']], 
                             left_on='squad_matched', right_on='squad', how='left')

        # Fjern den midlertidige 'squad_matched'
        merged_df = merged_df.drop(columns=['squad_matched', 'squad_y'])

        # Endre navnet på 'squad_x' til 'squad'
        merged_df = merged_df.rename(columns={'squad_x': 'squad'})

        return merged_df

In [195]:
# Filsti til possession-data
path = r"\\wsl.localhost\Ubuntu\home\henrik\Yamal Project\Output-FBref-possession-big8-2024-25-03-11-2025.csv"

# Initialiser klassen
normalizer = RetrievePossession(path)

# Anta at du allerede har en filtrert DataFrame 'processed_df' med spillere
merged_df = normalizer.merge_possession_data(filtered_df)

In [197]:
class NormalizePossession:
    def __init__(self, df):
        """
        Initialiserer klassen med en kopi av DataFrame for å bevare originalen.
        """
        self.df = df.copy()  # Lag en kopi for å unngå endring av originalen

    def possession_adjust_def(self, value, team_possession):
        """Justerer en (volum)defensiv metric basert på motstanderens besittelse."""
        if pd.isnull(team_possession) or team_possession >= 1:
            return value
        opp_poss = 1 - team_possession
        return value if opp_poss == 0 else value * (0.50 / opp_poss)

    def possession_adjust_off(self, value, team_possession):
        """Justerer en (volum)offensiv metric basert på eget lags besittelse."""
        if pd.isnull(team_possession) or team_possession == 0:
            return value
        return value * (0.50 / team_possession)

    def apply_adjustments(self):
        """
        Oppretter nye PAdj-kolonner for defensive og offensive stats.
        Returnerer en oppdatert DataFrame med justerte verdier.
        """
        def_def_cols = [
            'defensive duels per 90', 'interceptions per 90', 'fouls per 90',
            'aerial duels per 90', # Her kan du legge til flere defensive metrics
        ]

        def_off_cols = [
            'shots per 90',
            'crosses per 90', 'dribbles per 90', 'offensive duels per 90',
            'touches in box per 90', 'progressive runs per 90', 'accelerations per 90',
            'received passes per 90', 'received long passes per 90', 'fouls suffered per 90',
            'passes per 90', 'forward passes per 90', 'back passes per 90',
            'passes to final third per 90', 'progressive passes per 90',
             'crosses per 90','lateral passes per 90', 'short / medium passes per 90', 
            'long passes per 90','shot assists per 90', 
            'third assists per 90', 'key passes per 90',  'passes to penalty area per 90', 'through passes per 90',
             'deep completions per 90', 'deep completed crosses per 90', 
        ]

        # Justering av defensive metrics
        for col in def_def_cols:
            if col in self.df.columns:
                self.df[f"adj_{col}"] = self.df.apply(
                    lambda row: self.possession_adjust_def(row[col], row["poss_decimal"]),
                    axis=1
                )

        # Justering av offensive metrics
        for col in def_off_cols:
            if col in self.df.columns:
                self.df[f"adj_{col}"] = self.df.apply(
                    lambda row: self.possession_adjust_off(row[col], row["poss_decimal"]),
                    axis=1
                )

        # Finn alle PAdj-kolonner og rund av til 2 desimaler
        padj_columns = [col for col in self.df.columns if col.startswith("Adj_")]
        self.df[padj_columns] = self.df[padj_columns].round(2)

        return self.df  # Returnerer den oppdaterte DataFrame

In [199]:
# Initialiser klassen med merged_df
adjuster = NormalizePossession(merged_df)

# Utfør justeringene og returner den oppdaterte DataFrame
merged_df = adjuster.apply_adjustments()

In [201]:
merged_df.head()

Unnamed: 0,player,club today,squad,league,position,age,market value,contract months left,matches played,minutes played,goals,xg,assists,xa,duels per 90,"duels won, %",birth country,passport country,foot,height,weight,on loan,successful defensive actions per 90,defensive duels per 90,"defensive duels won, %",aerial duels per 90,"aerial duels won, %",sliding tackles per 90,padj sliding tackles,shots blocked per 90,interceptions per 90,padj interceptions,fouls per 90,yellow cards,yellow cards per 90,red cards,red cards per 90,successful attacking actions per 90,goals per 90,non-penalty goals,non-penalty goals per 90,xg per 90,head goals,head goals per 90,shots,shots per 90,"shots on target, %","goal conversion, %",assists per 90,crosses per 90,"accurate crosses, %",crosses from left flank per 90,"accurate crosses from left flank, %",crosses from right flank per 90,"accurate crosses from right flank, %",crosses to goalie box per 90,dribbles per 90,"successful dribbles, %",offensive duels per 90,"offensive duels won, %",touches in box per 90,progressive runs per 90,accelerations per 90,received passes per 90,received long passes per 90,fouls suffered per 90,passes per 90,"accurate passes, %",forward passes per 90,"accurate forward passes, %",back passes per 90,"accurate back passes, %",lateral passes per 90,"accurate lateral passes, %",short / medium passes per 90,"accurate short / medium passes, %",long passes per 90,"accurate long passes, %","average pass length, m","average long pass length, m",xa per 90,shot assists per 90,second assists per 90,third assists per 90,smart passes per 90,"accurate smart passes, %",key passes per 90,passes to final third per 90,"accurate passes to final third, %",passes to penalty area per 90,"accurate passes to penalty area, %",through passes per 90,"accurate through passes, %",deep completions per 90,deep completed crosses per 90,progressive passes per 90,"accurate progressive passes, %",main position,season,poss_decimal,adj_defensive duels per 90,adj_interceptions per 90,adj_fouls per 90,adj_aerial duels per 90,adj_shots per 90,adj_crosses per 90,adj_dribbles per 90,adj_offensive duels per 90,adj_touches in box per 90,adj_progressive runs per 90,adj_accelerations per 90,adj_received passes per 90,adj_received long passes per 90,adj_fouls suffered per 90,adj_passes per 90,adj_forward passes per 90,adj_back passes per 90,adj_passes to final third per 90,adj_progressive passes per 90,adj_lateral passes per 90,adj_short / medium passes per 90,adj_long passes per 90,adj_shot assists per 90,adj_third assists per 90,adj_key passes per 90,adj_passes to penalty area per 90,adj_through passes per 90,adj_deep completions per 90,adj_deep completed crosses per 90
0,B. van Rooij,Twente,Twente,Netherlands Eredivisie 2024-25,RB,24,2000000,31,36,3486,1,1.63,6,3.74,14.35,58.45,Netherlands,Netherlands,right,174,72,no,8.57,6.12,70.46,2.58,49.0,0.13,0.17,0.23,4.13,5.46,0.65,1,0.03,0,0.0,2.35,0.03,1,0.03,0.04,0,0.0,23,0.59,21.74,4.348,0.15,3.18,39.02,0.05,0.0,3.1,39.17,0.62,1.37,69.81,3.61,51.43,0.75,1.81,0.59,33.1,0.96,0.83,47.19,80.8,16.73,72.07,6.92,95.9,17.4,83.53,40.02,86.84,4.1,53.46,19.97,35.45,0.1,0.72,0.05,0.03,0.03,100.0,0.41,5.5,73.24,3.3,46.88,0.28,27.27,0.75,1.21,11.02,70.96,RB,2024-25,0.533,6.552463,4.421842,0.695931,2.762313,0.553471,2.983114,1.285178,3.386492,0.703565,1.697936,0.553471,31.050657,0.900563,0.778612,44.268293,15.694184,6.491557,5.159475,10.337711,16.322702,37.542214,3.846154,0.675422,0.028143,0.384615,3.095685,0.262664,0.703565,1.135084
1,N. Olij,Sparta Rotterdam,Sparta Rotterdam,Netherlands Eredivisie 2024-25,GK,29,4000000,31,34,3396,0,0.0,1,0.61,0.85,87.5,Netherlands,Netherlands,right,185,74,no,1.25,0.05,0.0,0.64,95.83,0.0,0.0,0.0,1.25,1.48,0.03,1,0.03,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,100.0,0.0,0.0,0.0,22.5,0.16,0.08,32.04,88.83,16.88,80.85,0.03,100.0,13.83,98.28,20.11,98.95,11.87,72.1,31.57,50.29,0.02,0.05,0.0,0.0,0.0,0.0,0.05,3.79,59.44,0.34,15.38,0.37,57.14,0.08,0.0,9.7,83.88,GK,2024-25,0.489,0.048924,1.223092,0.029354,0.626223,0.0,0.0,0.0,0.051125,0.0,0.0,0.0,23.006135,0.163599,0.0818,32.760736,17.259714,0.030675,3.875256,9.9182,14.141104,20.562372,12.137014,0.051125,0.0,0.051125,0.347648,0.378323,0.0818,0.0
2,F. de Keijzer,Heracles,Heracles,Netherlands Eredivisie 2024-25,GK,25,500000,19,34,3353,0,0.0,0,0.53,0.67,68.0,Netherlands,Netherlands,right,193,83,no,1.07,0.16,33.33,0.32,91.67,0.0,0.0,0.0,1.02,1.22,0.03,1,0.03,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,50.0,0.0,0.0,0.0,18.2,0.48,0.16,23.7,80.41,15.57,72.24,0.08,100.0,7.52,96.79,10.84,98.27,12.8,65.62,38.78,53.43,0.01,0.03,0.03,0.0,0.0,0.0,0.03,4.7,60.57,0.21,25.0,0.27,40.0,0.05,0.0,10.01,77.75,GK,2024-25,0.454,0.14652,0.934066,0.027473,0.29304,0.0,0.0,0.0,0.055066,0.0,0.0,0.0,20.044053,0.528634,0.176211,26.101322,17.147577,0.088106,5.176211,11.024229,8.281938,11.938326,14.096916,0.03304,0.0,0.03304,0.231278,0.297357,0.055066,0.0
3,T. Didillon-Hödl,Willem II,Willem II,Netherlands Eredivisie 2024-25,GK,29,500000,7,34,3353,0,0.0,0,0.0,0.56,85.71,France,France,left,193,84,no,1.07,0.08,33.33,0.38,100.0,0.0,0.0,0.0,1.05,1.13,0.05,2,0.05,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,100.0,0.0,0.0,0.0,14.04,0.19,0.03,20.43,82.26,12.96,73.29,0.0,0.0,6.68,98.8,10.09,97.07,10.25,68.32,35.18,49.43,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.99,55.41,0.03,0.0,0.08,33.33,0.0,0.0,8.88,73.11,GK,2024-25,0.44,0.071429,0.9375,0.044643,0.339286,0.0,0.0,0.0,0.034091,0.0,0.0,0.0,15.954545,0.215909,0.034091,23.215909,14.727273,0.0,2.261364,10.090909,7.590909,11.465909,11.647727,0.0,0.0,0.0,0.034091,0.090909,0.0,0.0
4,M. Deijl,Go Ahead Eagles,Go Ahead Eagles,Netherlands Eredivisie 2024-25,RB,27,1200000,19,34,3337,4,4.81,2,2.24,13.03,55.49,Netherlands,Netherlands,right,181,72,no,6.47,5.56,57.77,2.62,48.45,0.16,0.21,0.35,3.1,3.93,1.0,2,0.05,0,0.0,1.24,0.11,2,0.05,0.13,0,0.0,31,0.84,41.94,12.903,0.05,1.7,22.22,0.03,100.0,1.65,21.31,0.35,0.67,72.0,2.72,59.41,1.51,0.76,0.11,26.89,1.62,1.73,35.06,76.92,14.51,64.31,6.26,96.55,10.38,83.64,28.16,86.88,5.31,40.61,21.54,39.7,0.06,0.3,0.0,0.0,0.05,50.0,0.16,6.26,60.78,1.83,32.35,0.35,38.46,0.46,0.43,6.88,63.92,RB,2024-25,0.514,5.720165,3.1893,1.028807,2.695473,0.817121,1.653696,0.651751,2.645914,1.468872,0.7393,0.107004,26.157588,1.575875,1.682879,34.105058,14.114786,6.089494,6.089494,6.692607,10.097276,27.392996,5.16537,0.291829,0.0,0.155642,1.780156,0.340467,0.447471,0.418288


In [207]:
col_list = merged_df.columns.to_list()
print(col_list) #print the columns 

['player', 'club today', 'squad', 'league', 'position', 'age', 'market value', 'contract months left', 'matches played', 'minutes played', 'goals', 'xg', 'assists', 'xa', 'duels per 90', 'duels won, %', 'birth country', 'passport country', 'foot', 'height', 'weight', 'on loan', 'successful defensive actions per 90', 'defensive duels per 90', 'defensive duels won, %', 'aerial duels per 90', 'aerial duels won, %', 'sliding tackles per 90', 'padj sliding tackles', 'shots blocked per 90', 'interceptions per 90', 'padj interceptions', 'fouls per 90', 'yellow cards', 'yellow cards per 90', 'red cards', 'red cards per 90', 'successful attacking actions per 90', 'goals per 90', 'non-penalty goals', 'non-penalty goals per 90', 'xg per 90', 'head goals', 'head goals per 90', 'shots', 'shots per 90', 'shots on target, %', 'goal conversion, %', 'assists per 90', 'crosses per 90', 'accurate crosses, %', 'crosses from left flank per 90', 'accurate crosses from left flank, %', 'crosses from right fla

In [203]:
# Deretter kan du telle på nytt
position_counts = merged_df['main position'].value_counts()
position_counts

main position
CF      572
RCB     325
LCB     311
GK      307
RB      287
LB      270
LCMF    227
AMF     225
RCMF    198
LAMF    163
RAMF    148
RDMF    128
LDMF    123
RW       97
CB       97
LW       89
LWB      84
DMF      83
RWB      78
RWF      65
LWF      57
Name: count, dtype: int64

In [217]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Velg kolonnene vi skal analysere
columns_to_analyze = ['adj_shots per 90', 'xg per 90']

print("=== DESKRIPTIV STATISTIKK ===")
for col in columns_to_analyze:
    if col in df.columns:
        print(f"\n--- {col} ---")
        print(f"Antall verdier: {merged_df[col].count()}")
        print(f"Mangler: {df[col].isna().sum()}")
        print(f"Mean: {df[col].mean():.4f}")
        print(f"Median: {df[col].median():.4f}")
        print(f"Std: {df[col].std():.4f}")
        print(f"Min: {df[col].min():.4f}")
        print(f"Max: {df[col].max():.4f}")
        print(f"Range: {df[col].max() - df[col].min():.4f}")
        print(f"Variasjonskoeffisient: {(df[col].std() / df[col].mean()):.4f}" if df[col].mean() != 0 else "Udefinert")
    else:
        print(f"\n⚠️ Kolonne '{col}' finnes ikke i dataframe")

# Sjekk for ekstreme verdier (outliers)
print("\n=== OUTLIER-ANALYSE (IQR-metode) ===")
for col in columns_to_analyze:
    if col in df.columns:
        Q1 = df[col].quantile(0.25)
        Q3 = df[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        outliers = df[(df[col] < lower_bound) | (df[col] > upper_bound)][col]
        print(f"{col}: {len(outliers)} outliers ({len(outliers)/len(df)*100:.1f}%)")

=== DESKRIPTIV STATISTIKK ===

--- adj_shots per 90 ---
Antall verdier: 3828
Mangler: 106
Mean: 1.0104
Median: 0.7684
Std: 0.8807
Min: 0.0000
Max: 5.3579
Range: 5.3579
Variasjonskoeffisient: 0.8717

--- xg per 90 ---
Antall verdier: 3828
Mangler: 106
Mean: 0.1263
Median: 0.0700
Std: 0.1506
Min: 0.0000
Max: 1.6700
Range: 1.6700
Variasjonskoeffisient: 1.1924

=== OUTLIER-ANALYSE (IQR-metode) ===
adj_shots per 90: 36 outliers (0.9%)
xg per 90: 200 outliers (5.1%)


In [219]:
merged_df['xg per box touch']=merged_df['xg per 90']/filtered_df['touches in box per 90'] # ekstra metric
merged_df['xg per shot']=merged_df['xg per 90']/filtered_df['shots per 90'] 

In [154]:
from scipy.stats import zscore
import pandas as pd

merged_df = merged_df.copy()

# 1) Definer posisjonsgrupper
position_groups = {
    "GK": ["GK"],
    "Fullback": ["RB", "LB", "RWB", "LWB"],
    "Centre Back": ["LCB", "RCB", "CB"],
    "Defensive Midfielder": ["DMF", "LDMF", "RDMF"],
    "Center Midfield": ["LCMF", "RCMF"],
    "Left Winger": ["LW", "LAMF", "LWF"],
    "Center Att Midfielder": ["AMF"],
    "Right Winger": ["RW", "RAMF", "RWF"],
    "Striker": ["CF"],
}

def map_position(pos):
    for group, positions in position_groups.items():
        if pos in positions:
            return group
    return None

merged_df["position_group"] = merged_df["main position"].map(map_position)

# 2) Kolonner vi IKKE skal z-score
exclude_columns = [
    'player', 'squad', 'club today', 'season', 'league',
    'main position', 'position', 'age', 'market value',
    'contract months left', 'matches played', 'minutes played',
    'birth country', 'passport country', 'foot', 'height',
    'weight', 'on loan', 'position_group', 'poss_decimal',
]

# 3) Kolonner som skal z-scores (alle numeriske som ikke er i exclude)
columns_to_zscore = [
    col for col in merged_df.columns
    if col not in exclude_columns and merged_df[col].dtype in ['int64', 'float64']
]

# 🔹 3b) Ta en kopi av de opprinnelige metric-kolonnene FØR vi begynner å fylle og z-score
metrics_raw = merged_df[columns_to_zscore].copy()

# 4) Fyll NaN innenfor (league, position_group) med gruppesnittet
merged_filled = merged_df.copy()
for col in columns_to_zscore:
    merged_filled[col] = (
        merged_filled
        .groupby(['league', 'position_group'])[col]
        .transform(lambda x: x.fillna(x.mean()))
    )

# 5) Beregn z-score innen (league, position_group) på de utfylte verdiene
z_part = (
    merged_filled
    .groupby(['league', 'position_group'])[columns_to_zscore]
    .transform(zscore, nan_policy='omit')
)

# 6) Prefiks "z_" KUN på z-score-kolonnene
z_part = z_part.add_prefix("z_")

# 7) Sett sammen:
#    - meta (exclude_columns)
#    - originale metrics (metrics_raw)
#    - z-scorede metrics (z_part)
z_scores_df = pd.concat(
    [
        merged_df[exclude_columns],  # meta
        metrics_raw,                 # original metrics
        z_part                       # z-scored metrics
    ],
    axis=1
)

# 8) Kolonnerekkefølge: først meta, så original metrics, så z_
first_cols = exclude_columns
orig_metric_cols = metrics_raw.columns.tolist()
z_metric_cols = z_part.columns.tolist()

z_scores_df = z_scores_df[first_cols + orig_metric_cols + z_metric_cols]


  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)


In [173]:
z_scores_df.head()

Unnamed: 0,player,squad,club today,season,league,main position,position,age,market value,contract months left,matches played,minutes played,birth country,passport country,foot,height,weight,on loan,position_group,poss_decimal,goals,xg,assists,xa,duels per 90,"duels won, %",successful defensive actions per 90,defensive duels per 90,"defensive duels won, %",aerial duels per 90,"aerial duels won, %",sliding tackles per 90,padj sliding tackles,shots blocked per 90,interceptions per 90,padj interceptions,fouls per 90,yellow cards,yellow cards per 90,red cards,red cards per 90,successful attacking actions per 90,goals per 90,non-penalty goals,non-penalty goals per 90,xg per 90,head goals,head goals per 90,shots,shots per 90,"shots on target, %","goal conversion, %",assists per 90,crosses per 90,"accurate crosses, %",crosses from left flank per 90,"accurate crosses from left flank, %",crosses from right flank per 90,"accurate crosses from right flank, %",crosses to goalie box per 90,dribbles per 90,"successful dribbles, %",offensive duels per 90,"offensive duels won, %",touches in box per 90,progressive runs per 90,accelerations per 90,received passes per 90,received long passes per 90,fouls suffered per 90,passes per 90,"accurate passes, %",forward passes per 90,"accurate forward passes, %",back passes per 90,"accurate back passes, %",lateral passes per 90,"accurate lateral passes, %",short / medium passes per 90,"accurate short / medium passes, %",long passes per 90,"accurate long passes, %","average pass length, m","average long pass length, m",xa per 90,shot assists per 90,second assists per 90,third assists per 90,smart passes per 90,"accurate smart passes, %",key passes per 90,passes to final third per 90,"accurate passes to final third, %",passes to penalty area per 90,"accurate passes to penalty area, %",through passes per 90,"accurate through passes, %",deep completions per 90,deep completed crosses per 90,progressive passes per 90,"accurate progressive passes, %",adj_defensive duels per 90,adj_interceptions per 90,adj_fouls per 90,adj_aerial duels per 90,adj_shots per 90,adj_crosses per 90,adj_dribbles per 90,adj_offensive duels per 90,adj_touches in box per 90,adj_progressive runs per 90,adj_accelerations per 90,adj_received passes per 90,adj_received long passes per 90,adj_fouls suffered per 90,adj_passes per 90,adj_forward passes per 90,adj_back passes per 90,adj_passes to final third per 90,adj_progressive passes per 90,adj_lateral passes per 90,adj_short / medium passes per 90,adj_long passes per 90,adj_shot assists per 90,adj_third assists per 90,adj_key passes per 90,adj_passes to penalty area per 90,adj_through passes per 90,adj_deep completions per 90,adj_deep completed crosses per 90,xg per box touch,xg per shot,z_goals,z_xg,z_assists,z_xa,z_duels per 90,"z_duels won, %",z_successful defensive actions per 90,z_defensive duels per 90,"z_defensive duels won, %",z_aerial duels per 90,"z_aerial duels won, %",z_sliding tackles per 90,z_padj sliding tackles,z_shots blocked per 90,z_interceptions per 90,z_padj interceptions,z_fouls per 90,z_yellow cards,z_yellow cards per 90,z_red cards,z_red cards per 90,z_successful attacking actions per 90,z_goals per 90,z_non-penalty goals,z_non-penalty goals per 90,z_xg per 90,z_head goals,z_head goals per 90,z_shots,z_shots per 90,"z_shots on target, %","z_goal conversion, %",z_assists per 90,z_crosses per 90,"z_accurate crosses, %",z_crosses from left flank per 90,"z_accurate crosses from left flank, %",z_crosses from right flank per 90,"z_accurate crosses from right flank, %",z_crosses to goalie box per 90,z_dribbles per 90,"z_successful dribbles, %",z_offensive duels per 90,"z_offensive duels won, %",z_touches in box per 90,z_progressive runs per 90,z_accelerations per 90,z_received passes per 90,z_received long passes per 90,z_fouls suffered per 90,z_passes per 90,"z_accurate passes, %",z_forward passes per 90,"z_accurate forward passes, %",z_back passes per 90,"z_accurate back passes, %",z_lateral passes per 90,"z_accurate lateral passes, %",z_short / medium passes per 90,"z_accurate short / medium passes, %",z_long passes per 90,"z_accurate long passes, %","z_average pass length, m","z_average long pass length, m",z_xa per 90,z_shot assists per 90,z_second assists per 90,z_third assists per 90,z_smart passes per 90,"z_accurate smart passes, %",z_key passes per 90,z_passes to final third per 90,"z_accurate passes to final third, %",z_passes to penalty area per 90,"z_accurate passes to penalty area, %",z_through passes per 90,"z_accurate through passes, %",z_deep completions per 90,z_deep completed crosses per 90,z_progressive passes per 90,"z_accurate progressive passes, %",z_adj_defensive duels per 90,z_adj_interceptions per 90,z_adj_fouls per 90,z_adj_aerial duels per 90,z_adj_shots per 90,z_adj_crosses per 90,z_adj_dribbles per 90,z_adj_offensive duels per 90,z_adj_touches in box per 90,z_adj_progressive runs per 90,z_adj_accelerations per 90,z_adj_received passes per 90,z_adj_received long passes per 90,z_adj_fouls suffered per 90,z_adj_passes per 90,z_adj_forward passes per 90,z_adj_back passes per 90,z_adj_passes to final third per 90,z_adj_progressive passes per 90,z_adj_lateral passes per 90,z_adj_short / medium passes per 90,z_adj_long passes per 90,z_adj_shot assists per 90,z_adj_third assists per 90,z_adj_key passes per 90,z_adj_passes to penalty area per 90,z_adj_through passes per 90,z_adj_deep completions per 90,z_adj_deep completed crosses per 90,z_xg per box touch,z_xg per shot
0,B. van Rooij,Twente,Twente,2024-25,Netherlands Eredivisie 2024-25,RB,RB,24,2000000,31,36,3486,Netherlands,Netherlands,right,174,72,no,Fullback,0.533,1,1.63,6,3.74,14.35,58.45,8.57,6.12,70.46,2.58,49.0,0.13,0.17,0.23,4.13,5.46,0.65,1,0.03,0,0.0,2.35,0.03,1,0.03,0.04,0,0.0,23,0.59,21.74,4.348,0.15,3.18,39.02,0.05,0.0,3.1,39.17,0.62,1.37,69.81,3.61,51.43,0.75,1.81,0.59,33.1,0.96,0.83,47.19,80.8,16.73,72.07,6.92,95.9,17.4,83.53,40.02,86.84,4.1,53.46,19.97,35.45,0.1,0.72,0.05,0.03,0.03,100.0,0.41,5.5,73.24,3.3,46.88,0.28,27.27,0.75,1.21,11.02,70.96,6.552463,4.421842,0.695931,2.762313,0.553471,2.983114,1.285178,3.386492,0.703565,1.697936,0.553471,31.050657,0.900563,0.778612,44.268293,15.694184,6.491557,5.159475,10.337711,16.322702,37.542214,3.846154,0.675422,0.028143,0.384615,3.095685,0.262664,0.703565,1.135084,0.053333,0.067797,0.355703,1.115142,2.561959,1.857223,-0.35465,0.997193,-0.182805,-0.373678,0.761905,0.191659,0.047457,-0.642699,-0.529869,-0.213448,-0.128038,-0.094639,-0.428352,-0.490082,-0.72663,-0.34641,-0.325175,0.554692,-0.16266,0.48572,-0.140695,-0.085775,-0.301511,-0.260439,1.789547,0.147377,-0.209889,-0.191244,1.038231,0.545942,0.495065,-0.847066,-0.897555,1.468602,0.459808,0.838221,-0.130906,0.570144,-0.252982,0.572616,-0.519218,0.267417,1.125695,0.487936,-0.597555,0.096815,0.732297,0.136675,0.797513,0.183716,-0.174335,-0.124695,1.088864,0.146106,0.641328,-0.025586,0.209141,0.38519,0.449867,0.884816,0.213427,0.09769,0.137765,0.333295,-0.396609,2.015811,0.510723,0.469581,0.27931,0.788712,0.007298,0.208182,0.026456,0.328855,0.802816,1.635175,0.323642,-0.245728,0.042319,-0.347717,0.31049,0.070865,0.391696,-0.218013,-0.350994,-0.630581,0.187694,1.145229,0.544725,-0.68048,0.014888,0.80722,0.558264,-0.312503,0.379185,1.336752,1.298755,0.732293,0.038574,0.009424,0.361271,0.456108,0.585443,0.129279,0.349733,0.671501,0.431964,-0.317147
1,N. Olij,Sparta Rotterdam,Sparta Rotterdam,2024-25,Netherlands Eredivisie 2024-25,GK,GK,29,4000000,31,34,3396,Netherlands,Netherlands,right,185,74,no,GK,0.489,0,0.0,1,0.61,0.85,87.5,1.25,0.05,0.0,0.64,95.83,0.0,0.0,0.0,1.25,1.48,0.03,1,0.03,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,100.0,0.0,0.0,0.0,22.5,0.16,0.08,32.04,88.83,16.88,80.85,0.03,100.0,13.83,98.28,20.11,98.95,11.87,72.1,31.57,50.29,0.02,0.05,0.0,0.0,0.0,0.0,0.05,3.79,59.44,0.34,15.38,0.37,57.14,0.08,0.0,9.7,83.88,0.048924,1.223092,0.029354,0.626223,0.0,0.0,0.0,0.051125,0.0,0.0,0.0,23.006135,0.163599,0.0818,32.760736,17.259714,0.030675,3.875256,9.9182,14.141104,20.562372,12.137014,0.051125,0.0,0.051125,0.347648,0.378323,0.0818,0.0,,,,-0.164399,2.33513,4.545113,0.380965,0.504937,-0.28663,-0.22607,-0.870065,1.532342,0.662422,-0.202365,-0.196839,,-0.173326,-0.226398,-0.031873,0.504695,-0.126561,,,-0.222718,,,,,,,-0.164399,-0.164399,,,2.33513,,,,,,,,-0.232664,-0.506514,-0.248486,1.301682,-0.164399,-0.233204,-0.164399,0.512204,-0.418668,0.064511,0.766294,0.706596,0.532285,0.614269,0.24916,2.498153,0.649835,0.38899,0.595403,0.675651,0.472728,0.628818,-0.258703,0.225696,5.426047,5.193644,-0.339644,-0.331832,,,3.3214,0.471806,0.332532,0.606311,0.400638,1.028944,1.310038,0.71087,,0.453844,0.939264,-0.23235,-0.232418,-0.025636,1.518864,-0.164399,,-0.239869,-0.239352,-0.164399,-0.235273,-0.164399,0.730502,-0.396442,0.080376,0.96812,0.605472,0.208202,0.43171,0.422266,0.804585,0.819638,0.399734,5.082556,-0.324328,3.38879,0.560145,0.942054,0.677958,,,
2,F. de Keijzer,Heracles,Heracles,2024-25,Netherlands Eredivisie 2024-25,GK,GK,25,500000,19,34,3353,Netherlands,Netherlands,right,193,83,no,GK,0.454,0,0.0,0,0.53,0.67,68.0,1.07,0.16,33.33,0.32,91.67,0.0,0.0,0.0,1.02,1.22,0.03,1,0.03,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,50.0,0.0,0.0,0.0,18.2,0.48,0.16,23.7,80.41,15.57,72.24,0.08,100.0,7.52,96.79,10.84,98.27,12.8,65.62,38.78,53.43,0.01,0.03,0.03,0.0,0.0,0.0,0.03,4.7,60.57,0.21,25.0,0.27,40.0,0.05,0.0,10.01,77.75,0.14652,0.934066,0.027473,0.29304,0.0,0.0,0.0,0.055066,0.0,0.0,0.0,20.044053,0.528634,0.176211,26.101322,17.147577,0.088106,5.176211,11.024229,8.281938,11.938326,14.096916,0.03304,0.0,0.03304,0.231278,0.297357,0.055066,0.0,,,,-0.164399,-0.274721,3.9182,0.057665,-0.074857,-0.56591,0.453767,-0.131309,0.130289,0.559281,-0.202365,-0.196839,,-0.530605,-0.585436,-0.031873,0.504695,-0.126561,,,-0.222718,,,,,,,-0.164399,-0.164399,,,-0.274721,,,,,,,,-0.232664,-0.506514,-0.248486,0.162197,-0.164399,-0.233204,-0.164399,-0.144169,0.463655,0.917176,-0.414615,-0.553759,0.117749,-0.396629,1.177404,2.498153,-0.604532,-0.004729,-0.743911,0.364719,0.823256,0.065722,1.009266,1.081163,2.60153,3.02487,2.638776,-0.331832,,,1.862288,1.017825,0.393579,0.075251,0.916478,0.540196,0.682802,0.319376,,0.570447,0.337426,0.427739,-0.704511,-0.038791,0.018282,-0.164399,,-0.239869,-0.229915,-0.164399,-0.235273,-0.164399,0.212171,0.518461,1.069364,-0.045031,0.572565,1.15636,1.139802,0.776887,-0.439721,-0.601263,0.979033,3.203212,-0.324328,2.07499,0.119387,0.581153,0.350103,,,
3,T. Didillon-Hödl,Willem II,Willem II,2024-25,Netherlands Eredivisie 2024-25,GK,GK,29,500000,7,34,3353,France,France,left,193,84,no,GK,0.44,0,0.0,0,0.0,0.56,85.71,1.07,0.08,33.33,0.38,100.0,0.0,0.0,0.0,1.05,1.13,0.05,2,0.05,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,100.0,0.0,0.0,0.0,14.04,0.19,0.03,20.43,82.26,12.96,73.29,0.0,0.0,6.68,98.8,10.09,97.07,10.25,68.32,35.18,49.43,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.99,55.41,0.03,0.0,0.08,33.33,0.0,0.0,8.88,73.11,0.071429,0.9375,0.044643,0.339286,0.0,0.0,0.0,0.034091,0.0,0.0,0.0,15.954545,0.215909,0.034091,23.215909,14.727273,0.0,2.261364,10.090909,7.590909,11.465909,11.647727,0.0,0.0,0.0,0.034091,0.090909,0.0,0.0,,,,-0.164399,-0.274721,-0.235092,-0.139908,0.451715,-0.56591,-0.04066,-0.131309,0.393174,0.765811,-0.202365,-0.196839,,-0.484004,-0.709719,0.095619,1.570162,-0.009261,,,-0.222718,,,,,,,-0.164399,-0.164399,,,-0.274721,,,,,,,,-0.232664,-0.506514,-0.296785,1.301682,-0.164399,-0.233204,-0.164399,-0.779172,-0.33595,-0.468405,-0.877633,-0.27684,-0.708159,-0.273348,-0.307786,-0.422764,-0.771516,0.526395,-0.85227,-0.183984,-0.137871,0.300345,0.376161,-0.008603,-0.222988,-0.228292,-0.339644,-0.331832,,,-0.32638,-0.608232,0.114815,-0.660062,-0.424062,-0.388426,0.438714,-0.333113,,0.145408,-0.118125,-0.080139,-0.698902,0.081248,0.226561,-0.164399,,-0.239869,-0.28014,-0.164399,-0.235273,-0.164399,-0.503448,-0.265336,-0.419384,-0.484013,-0.13769,-0.298226,-0.44671,0.47764,-0.586474,-0.679098,0.255113,-0.23016,-0.324328,-0.32519,-0.627463,-0.339073,-0.325217,,,
4,M. Deijl,Go Ahead Eagles,Go Ahead Eagles,2024-25,Netherlands Eredivisie 2024-25,RB,RB,27,1200000,19,34,3337,Netherlands,Netherlands,right,181,72,no,Fullback,0.514,4,4.81,2,2.24,13.03,55.49,6.47,5.56,57.77,2.62,48.45,0.16,0.21,0.35,3.1,3.93,1.0,2,0.05,0,0.0,1.24,0.11,2,0.05,0.13,0,0.0,31,0.84,41.94,12.903,0.05,1.7,22.22,0.03,100.0,1.65,21.31,0.35,0.67,72.0,2.72,59.41,1.51,0.76,0.11,26.89,1.62,1.73,35.06,76.92,14.51,64.31,6.26,96.55,10.38,83.64,28.16,86.88,5.31,40.61,21.54,39.7,0.06,0.3,0.0,0.0,0.05,50.0,0.16,6.26,60.78,1.83,32.35,0.35,38.46,0.46,0.43,6.88,63.92,5.720165,3.1893,1.028807,2.695473,0.817121,1.653696,0.651751,2.645914,1.468872,0.7393,0.107004,26.157588,1.575875,1.682879,34.105058,14.114786,6.089494,6.089494,6.692607,10.097276,27.392996,5.16537,0.291829,0.0,0.155642,1.780156,0.340467,0.447471,0.418288,0.086093,0.154762,3.460024,4.83407,0.40452,0.735081,-0.772672,0.44476,-1.243985,-0.660853,-0.576906,0.227899,0.010999,-0.553088,-0.446854,0.294363,-1.016344,-0.758788,0.507383,-0.042007,-0.612031,-0.34641,-0.325175,-0.692624,0.637452,1.681338,0.060931,2.255882,-0.301511,-0.260439,2.741988,0.835841,0.666811,0.475706,-0.196789,-0.603464,-0.653983,-0.860371,3.052072,0.386142,-0.203422,-0.010012,-0.903722,0.669932,-0.704878,1.197021,0.325702,-0.913892,-0.653261,-0.181281,0.271011,1.728319,-0.441886,-0.389003,0.201392,-0.550567,-0.412954,0.071938,-0.469074,0.157688,-0.526953,-0.018235,0.928556,-0.517803,1.041112,1.367514,-0.349525,-0.862137,-0.393448,-0.444394,-0.179844,0.673929,-0.615144,0.843775,-0.517155,-0.561333,-0.785623,0.537244,0.40642,-0.277184,-0.736872,-0.29307,-0.221583,-0.684438,-1.088755,0.480731,0.248562,0.80946,-0.606326,-0.926954,-0.732809,0.434957,-0.965071,-0.701081,-0.229025,0.221833,1.71271,-0.633356,0.132465,-0.502709,0.883446,-0.353666,-0.593151,-0.725723,0.771628,-0.867259,-0.462582,-0.640741,-0.565231,0.501434,-0.292594,-0.750071,1.465734,1.542061


In [156]:
output_path=r"C:\Users\henri\Desktop\Fotballanalyser\Lamine Yamal\Update\Output\wyscout-big8-preprocessed-06-11-25.csv"
z_scores_df.to_csv(output_path)

In [157]:
col_list = z_scores_df.columns.to_list()
print(col_list) #print the columns 

['player', 'squad', 'club today', 'season', 'league', 'main position', 'position', 'age', 'market value', 'contract months left', 'matches played', 'minutes played', 'birth country', 'passport country', 'foot', 'height', 'weight', 'on loan', 'position_group', 'poss_decimal', 'goals', 'xg', 'assists', 'xa', 'duels per 90', 'duels won, %', 'successful defensive actions per 90', 'defensive duels per 90', 'defensive duels won, %', 'aerial duels per 90', 'aerial duels won, %', 'sliding tackles per 90', 'padj sliding tackles', 'shots blocked per 90', 'interceptions per 90', 'padj interceptions', 'fouls per 90', 'yellow cards', 'yellow cards per 90', 'red cards', 'red cards per 90', 'successful attacking actions per 90', 'goals per 90', 'non-penalty goals', 'non-penalty goals per 90', 'xg per 90', 'head goals', 'head goals per 90', 'shots', 'shots per 90', 'shots on target, %', 'goal conversion, %', 'assists per 90', 'crosses per 90', 'accurate crosses, %', 'crosses from left flank per 90', '

In [158]:
z_scores_df.head()

Unnamed: 0,player,squad,club today,season,league,main position,position,age,market value,contract months left,matches played,minutes played,birth country,passport country,foot,height,weight,on loan,position_group,poss_decimal,goals,xg,assists,xa,duels per 90,"duels won, %",successful defensive actions per 90,defensive duels per 90,"defensive duels won, %",aerial duels per 90,"aerial duels won, %",sliding tackles per 90,padj sliding tackles,shots blocked per 90,interceptions per 90,padj interceptions,fouls per 90,yellow cards,yellow cards per 90,red cards,red cards per 90,successful attacking actions per 90,goals per 90,non-penalty goals,non-penalty goals per 90,xg per 90,head goals,head goals per 90,shots,shots per 90,"shots on target, %","goal conversion, %",assists per 90,crosses per 90,"accurate crosses, %",crosses from left flank per 90,"accurate crosses from left flank, %",crosses from right flank per 90,"accurate crosses from right flank, %",crosses to goalie box per 90,dribbles per 90,"successful dribbles, %",offensive duels per 90,"offensive duels won, %",touches in box per 90,progressive runs per 90,accelerations per 90,received passes per 90,received long passes per 90,fouls suffered per 90,passes per 90,"accurate passes, %",forward passes per 90,"accurate forward passes, %",back passes per 90,"accurate back passes, %",lateral passes per 90,"accurate lateral passes, %",short / medium passes per 90,"accurate short / medium passes, %",long passes per 90,"accurate long passes, %","average pass length, m","average long pass length, m",xa per 90,shot assists per 90,second assists per 90,third assists per 90,smart passes per 90,"accurate smart passes, %",key passes per 90,passes to final third per 90,"accurate passes to final third, %",passes to penalty area per 90,"accurate passes to penalty area, %",through passes per 90,"accurate through passes, %",deep completions per 90,deep completed crosses per 90,progressive passes per 90,"accurate progressive passes, %",adj_defensive duels per 90,adj_interceptions per 90,adj_fouls per 90,adj_aerial duels per 90,adj_shots per 90,adj_crosses per 90,adj_dribbles per 90,adj_offensive duels per 90,adj_touches in box per 90,adj_progressive runs per 90,adj_accelerations per 90,adj_received passes per 90,adj_received long passes per 90,adj_fouls suffered per 90,adj_passes per 90,adj_forward passes per 90,adj_back passes per 90,adj_passes to final third per 90,adj_progressive passes per 90,adj_lateral passes per 90,adj_short / medium passes per 90,adj_long passes per 90,adj_shot assists per 90,adj_third assists per 90,adj_key passes per 90,adj_passes to penalty area per 90,adj_through passes per 90,adj_deep completions per 90,adj_deep completed crosses per 90,xg per box touch,xg per shot,z_goals,z_xg,z_assists,z_xa,z_duels per 90,"z_duels won, %",z_successful defensive actions per 90,z_defensive duels per 90,"z_defensive duels won, %",z_aerial duels per 90,"z_aerial duels won, %",z_sliding tackles per 90,z_padj sliding tackles,z_shots blocked per 90,z_interceptions per 90,z_padj interceptions,z_fouls per 90,z_yellow cards,z_yellow cards per 90,z_red cards,z_red cards per 90,z_successful attacking actions per 90,z_goals per 90,z_non-penalty goals,z_non-penalty goals per 90,z_xg per 90,z_head goals,z_head goals per 90,z_shots,z_shots per 90,"z_shots on target, %","z_goal conversion, %",z_assists per 90,z_crosses per 90,"z_accurate crosses, %",z_crosses from left flank per 90,"z_accurate crosses from left flank, %",z_crosses from right flank per 90,"z_accurate crosses from right flank, %",z_crosses to goalie box per 90,z_dribbles per 90,"z_successful dribbles, %",z_offensive duels per 90,"z_offensive duels won, %",z_touches in box per 90,z_progressive runs per 90,z_accelerations per 90,z_received passes per 90,z_received long passes per 90,z_fouls suffered per 90,z_passes per 90,"z_accurate passes, %",z_forward passes per 90,"z_accurate forward passes, %",z_back passes per 90,"z_accurate back passes, %",z_lateral passes per 90,"z_accurate lateral passes, %",z_short / medium passes per 90,"z_accurate short / medium passes, %",z_long passes per 90,"z_accurate long passes, %","z_average pass length, m","z_average long pass length, m",z_xa per 90,z_shot assists per 90,z_second assists per 90,z_third assists per 90,z_smart passes per 90,"z_accurate smart passes, %",z_key passes per 90,z_passes to final third per 90,"z_accurate passes to final third, %",z_passes to penalty area per 90,"z_accurate passes to penalty area, %",z_through passes per 90,"z_accurate through passes, %",z_deep completions per 90,z_deep completed crosses per 90,z_progressive passes per 90,"z_accurate progressive passes, %",z_adj_defensive duels per 90,z_adj_interceptions per 90,z_adj_fouls per 90,z_adj_aerial duels per 90,z_adj_shots per 90,z_adj_crosses per 90,z_adj_dribbles per 90,z_adj_offensive duels per 90,z_adj_touches in box per 90,z_adj_progressive runs per 90,z_adj_accelerations per 90,z_adj_received passes per 90,z_adj_received long passes per 90,z_adj_fouls suffered per 90,z_adj_passes per 90,z_adj_forward passes per 90,z_adj_back passes per 90,z_adj_passes to final third per 90,z_adj_progressive passes per 90,z_adj_lateral passes per 90,z_adj_short / medium passes per 90,z_adj_long passes per 90,z_adj_shot assists per 90,z_adj_third assists per 90,z_adj_key passes per 90,z_adj_passes to penalty area per 90,z_adj_through passes per 90,z_adj_deep completions per 90,z_adj_deep completed crosses per 90,z_xg per box touch,z_xg per shot
0,B. van Rooij,Twente,Twente,2024-25,Netherlands Eredivisie 2024-25,RB,RB,24,2000000,31,36,3486,Netherlands,Netherlands,right,174,72,no,Fullback,0.533,1,1.63,6,3.74,14.35,58.45,8.57,6.12,70.46,2.58,49.0,0.13,0.17,0.23,4.13,5.46,0.65,1,0.03,0,0.0,2.35,0.03,1,0.03,0.04,0,0.0,23,0.59,21.74,4.348,0.15,3.18,39.02,0.05,0.0,3.1,39.17,0.62,1.37,69.81,3.61,51.43,0.75,1.81,0.59,33.1,0.96,0.83,47.19,80.8,16.73,72.07,6.92,95.9,17.4,83.53,40.02,86.84,4.1,53.46,19.97,35.45,0.1,0.72,0.05,0.03,0.03,100.0,0.41,5.5,73.24,3.3,46.88,0.28,27.27,0.75,1.21,11.02,70.96,6.552463,4.421842,0.695931,2.762313,0.553471,2.983114,1.285178,3.386492,0.703565,1.697936,0.553471,31.050657,0.900563,0.778612,44.268293,15.694184,6.491557,5.159475,10.337711,16.322702,37.542214,3.846154,0.675422,0.028143,0.384615,3.095685,0.262664,0.703565,1.135084,0.053333,0.067797,0.355703,1.115142,2.561959,1.857223,-0.35465,0.997193,-0.182805,-0.373678,0.761905,0.191659,0.047457,-0.642699,-0.529869,-0.213448,-0.128038,-0.094639,-0.428352,-0.490082,-0.72663,-0.34641,-0.325175,0.554692,-0.16266,0.48572,-0.140695,-0.085775,-0.301511,-0.260439,1.789547,0.147377,-0.209889,-0.191244,1.038231,0.545942,0.495065,-0.847066,-0.897555,1.468602,0.459808,0.838221,-0.130906,0.570144,-0.252982,0.572616,-0.519218,0.267417,1.125695,0.487936,-0.597555,0.096815,0.732297,0.136675,0.797513,0.183716,-0.174335,-0.124695,1.088864,0.146106,0.641328,-0.025586,0.209141,0.38519,0.449867,0.884816,0.213427,0.09769,0.137765,0.333295,-0.396609,2.015811,0.510723,0.469581,0.27931,0.788712,0.007298,0.208182,0.026456,0.328855,0.802816,1.635175,0.323642,-0.245728,0.042319,-0.347717,0.31049,0.070865,0.391696,-0.218013,-0.350994,-0.630581,0.187694,1.145229,0.544725,-0.68048,0.014888,0.80722,0.558264,-0.312503,0.379185,1.336752,1.298755,0.732293,0.038574,0.009424,0.361271,0.456108,0.585443,0.129279,0.349733,0.671501,0.431964,-0.317147
1,N. Olij,Sparta Rotterdam,Sparta Rotterdam,2024-25,Netherlands Eredivisie 2024-25,GK,GK,29,4000000,31,34,3396,Netherlands,Netherlands,right,185,74,no,GK,0.489,0,0.0,1,0.61,0.85,87.5,1.25,0.05,0.0,0.64,95.83,0.0,0.0,0.0,1.25,1.48,0.03,1,0.03,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,100.0,0.0,0.0,0.0,22.5,0.16,0.08,32.04,88.83,16.88,80.85,0.03,100.0,13.83,98.28,20.11,98.95,11.87,72.1,31.57,50.29,0.02,0.05,0.0,0.0,0.0,0.0,0.05,3.79,59.44,0.34,15.38,0.37,57.14,0.08,0.0,9.7,83.88,0.048924,1.223092,0.029354,0.626223,0.0,0.0,0.0,0.051125,0.0,0.0,0.0,23.006135,0.163599,0.0818,32.760736,17.259714,0.030675,3.875256,9.9182,14.141104,20.562372,12.137014,0.051125,0.0,0.051125,0.347648,0.378323,0.0818,0.0,,,,-0.164399,2.33513,4.545113,0.380965,0.504937,-0.28663,-0.22607,-0.870065,1.532342,0.662422,-0.202365,-0.196839,,-0.173326,-0.226398,-0.031873,0.504695,-0.126561,,,-0.222718,,,,,,,-0.164399,-0.164399,,,2.33513,,,,,,,,-0.232664,-0.506514,-0.248486,1.301682,-0.164399,-0.233204,-0.164399,0.512204,-0.418668,0.064511,0.766294,0.706596,0.532285,0.614269,0.24916,2.498153,0.649835,0.38899,0.595403,0.675651,0.472728,0.628818,-0.258703,0.225696,5.426047,5.193644,-0.339644,-0.331832,,,3.3214,0.471806,0.332532,0.606311,0.400638,1.028944,1.310038,0.71087,,0.453844,0.939264,-0.23235,-0.232418,-0.025636,1.518864,-0.164399,,-0.239869,-0.239352,-0.164399,-0.235273,-0.164399,0.730502,-0.396442,0.080376,0.96812,0.605472,0.208202,0.43171,0.422266,0.804585,0.819638,0.399734,5.082556,-0.324328,3.38879,0.560145,0.942054,0.677958,,,
2,F. de Keijzer,Heracles,Heracles,2024-25,Netherlands Eredivisie 2024-25,GK,GK,25,500000,19,34,3353,Netherlands,Netherlands,right,193,83,no,GK,0.454,0,0.0,0,0.53,0.67,68.0,1.07,0.16,33.33,0.32,91.67,0.0,0.0,0.0,1.02,1.22,0.03,1,0.03,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,50.0,0.0,0.0,0.0,18.2,0.48,0.16,23.7,80.41,15.57,72.24,0.08,100.0,7.52,96.79,10.84,98.27,12.8,65.62,38.78,53.43,0.01,0.03,0.03,0.0,0.0,0.0,0.03,4.7,60.57,0.21,25.0,0.27,40.0,0.05,0.0,10.01,77.75,0.14652,0.934066,0.027473,0.29304,0.0,0.0,0.0,0.055066,0.0,0.0,0.0,20.044053,0.528634,0.176211,26.101322,17.147577,0.088106,5.176211,11.024229,8.281938,11.938326,14.096916,0.03304,0.0,0.03304,0.231278,0.297357,0.055066,0.0,,,,-0.164399,-0.274721,3.9182,0.057665,-0.074857,-0.56591,0.453767,-0.131309,0.130289,0.559281,-0.202365,-0.196839,,-0.530605,-0.585436,-0.031873,0.504695,-0.126561,,,-0.222718,,,,,,,-0.164399,-0.164399,,,-0.274721,,,,,,,,-0.232664,-0.506514,-0.248486,0.162197,-0.164399,-0.233204,-0.164399,-0.144169,0.463655,0.917176,-0.414615,-0.553759,0.117749,-0.396629,1.177404,2.498153,-0.604532,-0.004729,-0.743911,0.364719,0.823256,0.065722,1.009266,1.081163,2.60153,3.02487,2.638776,-0.331832,,,1.862288,1.017825,0.393579,0.075251,0.916478,0.540196,0.682802,0.319376,,0.570447,0.337426,0.427739,-0.704511,-0.038791,0.018282,-0.164399,,-0.239869,-0.229915,-0.164399,-0.235273,-0.164399,0.212171,0.518461,1.069364,-0.045031,0.572565,1.15636,1.139802,0.776887,-0.439721,-0.601263,0.979033,3.203212,-0.324328,2.07499,0.119387,0.581153,0.350103,,,
3,T. Didillon-Hödl,Willem II,Willem II,2024-25,Netherlands Eredivisie 2024-25,GK,GK,29,500000,7,34,3353,France,France,left,193,84,no,GK,0.44,0,0.0,0,0.0,0.56,85.71,1.07,0.08,33.33,0.38,100.0,0.0,0.0,0.0,1.05,1.13,0.05,2,0.05,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,100.0,0.0,0.0,0.0,14.04,0.19,0.03,20.43,82.26,12.96,73.29,0.0,0.0,6.68,98.8,10.09,97.07,10.25,68.32,35.18,49.43,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.99,55.41,0.03,0.0,0.08,33.33,0.0,0.0,8.88,73.11,0.071429,0.9375,0.044643,0.339286,0.0,0.0,0.0,0.034091,0.0,0.0,0.0,15.954545,0.215909,0.034091,23.215909,14.727273,0.0,2.261364,10.090909,7.590909,11.465909,11.647727,0.0,0.0,0.0,0.034091,0.090909,0.0,0.0,,,,-0.164399,-0.274721,-0.235092,-0.139908,0.451715,-0.56591,-0.04066,-0.131309,0.393174,0.765811,-0.202365,-0.196839,,-0.484004,-0.709719,0.095619,1.570162,-0.009261,,,-0.222718,,,,,,,-0.164399,-0.164399,,,-0.274721,,,,,,,,-0.232664,-0.506514,-0.296785,1.301682,-0.164399,-0.233204,-0.164399,-0.779172,-0.33595,-0.468405,-0.877633,-0.27684,-0.708159,-0.273348,-0.307786,-0.422764,-0.771516,0.526395,-0.85227,-0.183984,-0.137871,0.300345,0.376161,-0.008603,-0.222988,-0.228292,-0.339644,-0.331832,,,-0.32638,-0.608232,0.114815,-0.660062,-0.424062,-0.388426,0.438714,-0.333113,,0.145408,-0.118125,-0.080139,-0.698902,0.081248,0.226561,-0.164399,,-0.239869,-0.28014,-0.164399,-0.235273,-0.164399,-0.503448,-0.265336,-0.419384,-0.484013,-0.13769,-0.298226,-0.44671,0.47764,-0.586474,-0.679098,0.255113,-0.23016,-0.324328,-0.32519,-0.627463,-0.339073,-0.325217,,,
4,M. Deijl,Go Ahead Eagles,Go Ahead Eagles,2024-25,Netherlands Eredivisie 2024-25,RB,RB,27,1200000,19,34,3337,Netherlands,Netherlands,right,181,72,no,Fullback,0.514,4,4.81,2,2.24,13.03,55.49,6.47,5.56,57.77,2.62,48.45,0.16,0.21,0.35,3.1,3.93,1.0,2,0.05,0,0.0,1.24,0.11,2,0.05,0.13,0,0.0,31,0.84,41.94,12.903,0.05,1.7,22.22,0.03,100.0,1.65,21.31,0.35,0.67,72.0,2.72,59.41,1.51,0.76,0.11,26.89,1.62,1.73,35.06,76.92,14.51,64.31,6.26,96.55,10.38,83.64,28.16,86.88,5.31,40.61,21.54,39.7,0.06,0.3,0.0,0.0,0.05,50.0,0.16,6.26,60.78,1.83,32.35,0.35,38.46,0.46,0.43,6.88,63.92,5.720165,3.1893,1.028807,2.695473,0.817121,1.653696,0.651751,2.645914,1.468872,0.7393,0.107004,26.157588,1.575875,1.682879,34.105058,14.114786,6.089494,6.089494,6.692607,10.097276,27.392996,5.16537,0.291829,0.0,0.155642,1.780156,0.340467,0.447471,0.418288,0.086093,0.154762,3.460024,4.83407,0.40452,0.735081,-0.772672,0.44476,-1.243985,-0.660853,-0.576906,0.227899,0.010999,-0.553088,-0.446854,0.294363,-1.016344,-0.758788,0.507383,-0.042007,-0.612031,-0.34641,-0.325175,-0.692624,0.637452,1.681338,0.060931,2.255882,-0.301511,-0.260439,2.741988,0.835841,0.666811,0.475706,-0.196789,-0.603464,-0.653983,-0.860371,3.052072,0.386142,-0.203422,-0.010012,-0.903722,0.669932,-0.704878,1.197021,0.325702,-0.913892,-0.653261,-0.181281,0.271011,1.728319,-0.441886,-0.389003,0.201392,-0.550567,-0.412954,0.071938,-0.469074,0.157688,-0.526953,-0.018235,0.928556,-0.517803,1.041112,1.367514,-0.349525,-0.862137,-0.393448,-0.444394,-0.179844,0.673929,-0.615144,0.843775,-0.517155,-0.561333,-0.785623,0.537244,0.40642,-0.277184,-0.736872,-0.29307,-0.221583,-0.684438,-1.088755,0.480731,0.248562,0.80946,-0.606326,-0.926954,-0.732809,0.434957,-0.965071,-0.701081,-0.229025,0.221833,1.71271,-0.633356,0.132465,-0.502709,0.883446,-0.353666,-0.593151,-0.725723,0.771628,-0.867259,-0.462582,-0.640741,-0.565231,0.501434,-0.292594,-0.750071,1.465734,1.542061
