#UEFA EURO 2024 Players Dataset

In [28]:
#link to the Kaggle dataset -> https://www.kaggle.com/datasets/damirdizdarevic/uefa-euro-2024-players

import pandas as pd
import numpy as np

##Data Exploration

In [29]:
df_euro_2024_players = pd.read_csv("/content/euro2024_players.csv")

In [30]:
df_euro_2024_players.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 623 entries, 0 to 622
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Name         623 non-null    object
 1   Position     623 non-null    object
 2   Age          623 non-null    int64 
 3   Club         623 non-null    object
 4   Height       623 non-null    int64 
 5   Foot         620 non-null    object
 6   Caps         623 non-null    int64 
 7   Goals        623 non-null    int64 
 8   MarketValue  623 non-null    int64 
 9   Country      623 non-null    object
dtypes: int64(5), object(5)
memory usage: 48.8+ KB


In [31]:
'''
About Dataset
Dataset of all the players that are in the squad of the teams participating
in the UEFA EURO 2024. Contains info about clubs, age, height, market value etc.
which can be very good for EDA and Data Visualizations.
'''

'\nAbout Dataset\nDataset of all the players that are in the squad of the teams participating\nin the UEFA EURO 2024. Contains info about clubs, age, height, market value etc.\nwhich can be very good for EDA and Data Visualizations.\n'

In [32]:
df_euro_2024_players.describe(include='all')

Unnamed: 0,Name,Position,Age,Club,Height,Foot,Caps,Goals,MarketValue,Country
count,623,623,623.0,623,623.0,620,623.0,623.0,623.0,623
unique,623,13,,219,,4,,,,24
top,Tomas Chory,Centre-Back,,Inter Milan,,right,,,,Poland
freq,1,125,,13,,436,,,,27
mean,,,27.040128,,184.18138,,30.338684,4.152488,18409030.0,
std,,,4.124275,,6.569258,,30.987902,10.086803,24261950.0,
min,,,16.0,,167.0,,0.0,0.0,50000.0,
25%,,,24.0,,180.0,,7.0,0.0,2900000.0,
50%,,,27.0,,185.0,,21.0,1.0,9000000.0,
75%,,,30.0,,189.0,,42.0,4.0,25000000.0,


In [33]:
columns_euro_2024_players = [
    'Name', 'Position', 'Age', 'Club',
    'Height', 'Foot', 'Caps', 'Goals',
    'MarketValue', 'Country'
]

for col in columns_euro_2024_players:
    missing = df_euro_2024_players[col].isna().sum()
    duplicated = df_euro_2024_players[col].duplicated().sum()
    total_non_null = df_euro_2024_players[col].count()

    print(f"--- {col} ---")
    print(f"Missing values:        {missing}")
    print(f"Duplicated values:     {duplicated}")
    print(f"Total non-null values: {total_non_null}\n")

--- Name ---
Missing values:        0
Duplicated values:     0
Total non-null values: 623

--- Position ---
Missing values:        0
Duplicated values:     610
Total non-null values: 623

--- Age ---
Missing values:        0
Duplicated values:     599
Total non-null values: 623

--- Club ---
Missing values:        0
Duplicated values:     404
Total non-null values: 623

--- Height ---
Missing values:        0
Duplicated values:     589
Total non-null values: 623

--- Foot ---
Missing values:        3
Duplicated values:     618
Total non-null values: 620

--- Caps ---
Missing values:        0
Duplicated values:     510
Total non-null values: 623

--- Goals ---
Missing values:        0
Duplicated values:     584
Total non-null values: 623

--- MarketValue ---
Missing values:        0
Duplicated values:     540
Total non-null values: 623

--- Country ---
Missing values:        0
Duplicated values:     599
Total non-null values: 623



##Data Cleaning

In [34]:
#Position column

#Missing values:        0
#Duplicated values:     610
#Total non-null values: 623

df_euro_2024_players['Position'].value_counts()

Unnamed: 0_level_0,count
Position,Unnamed: 1_level_1
Centre-Back,125
Central Midfield,81
Centre-Forward,77
Goalkeeper,72
Defensive Midfield,50
Attacking Midfield,48
Left Winger,44
Right-Back,43
Right Winger,35
Left-Back,33


In [35]:
#Age column

#Missing values:        0
#Duplicated values:     599
#Total non-null values: 623

#range check -> (16, 41)
df_euro_2024_players['Age'].min(), df_euro_2024_players['Age'].max()

df_euro_2024_players['Age'].value_counts()

Unnamed: 0_level_0,count
Age,Unnamed: 1_level_1
25,62
27,59
26,58
28,56
24,52
29,44
23,42
31,36
30,36
32,35


In [36]:
#Club column

#Missing values:        0
#Duplicated values:     404
#Total non-null values: 623

df_euro_2024_players['Club'].value_counts()

Unnamed: 0_level_0,count
Club,Unnamed: 1_level_1
Inter Milan,13
Manchester City,13
FC Barcelona,12
Real Madrid,12
Paris Saint-Germain,12
...,...
Cracovia,1
Karlsruher SC,1
FC Metz,1
Dinamo Batumi,1


In [37]:
#Height column

#Missing values:        0
#Duplicated values:     589
#Total non-null values: 623

#range check -> (167, 202)
df_euro_2024_players['Height'].min(), df_euro_2024_players['Height'].max()

df_euro_2024_players['Height'].value_counts()

Unnamed: 0_level_0,count
Height,Unnamed: 1_level_1
188,43
178,42
185,39
190,38
180,38
183,32
186,32
182,32
189,31
187,31


In [38]:
#Foot column

#Missing values:        3
#Duplicated values:     618
#Total non-null values: 620

df_euro_2024_players['Foot'].value_counts()

Unnamed: 0_level_0,count
Foot,Unnamed: 1_level_1
right,436
left,150
both,31
-,3


In [39]:
#Caps column

#Missing values:        0
#Duplicated values:     510
#Total non-null values: 623

df_euro_2024_players['Caps'].value_counts()

Unnamed: 0_level_0,count
Caps,Unnamed: 1_level_1
1,37
3,27
2,22
6,17
4,16
...,...
85,1
112,1
108,1
136,1


In [40]:
#Goals column

#Missing values:        0
#Duplicated values:     584
#Total non-null values: 623

df_euro_2024_players['Goals'].value_counts()

Unnamed: 0_level_0,count
Goals,Unnamed: 1_level_1
0,253
1,94
2,70
3,44
4,23
8,19
5,16
6,14
11,12
7,12


In [41]:
#MarketValue column

#Missing values:        0
#Duplicated values:     540
#Total non-null values: 623

#range check -> (50.000, 180.000.000)
df_euro_2024_players['MarketValue'].min(), df_euro_2024_players['MarketValue'].max()

df_euro_2024_players['MarketValue'].value_counts()

Unnamed: 0_level_0,count
MarketValue,Unnamed: 1_level_1
5000000,28
2500000,26
3000000,26
10000000,25
30000000,25
...,...
50000,1
1100000,1
250000,1
750000,1


In [42]:
#Country column

#Missing values:        0
#Duplicated values:     599
#Total non-null values: 623

df_euro_2024_players['Country'].value_counts()

Unnamed: 0_level_0,count
Country,Unnamed: 1_level_1
Poland,27
Germany,26
Hungary,26
Scotland,26
Spain,26
Croatia,26
Albania,26
Switzerland,26
Italy,26
Slovenia,26
