# Soccer Stricker’s Data Analysis

In [1]:
# Import necessary libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Import dataset from csv file

df = pd.read_csv("../DATA/soccer_strikers.csv")
df.head()

Unnamed: 0,Player Name,Chances Created,Assists,Goals,Club,Manager,Annual Salary
0,Cristiano Ronaldo,85,20,35,Manchester United,Ole Gunnar Solskjær,"$35,000,000"
1,Lionel Messi,90,25,30,Paris Saint-Germain,Mauricio Pochettino,"$40,000,000"
2,Robert Lewandowski,70,15,40,Bayern Munich,Julian Nagelsmann,"$25,000,000"
3,Erling Haaland,60,10,35,Borussia Dortmund,Marco Rose,"$20,000,000"
4,Kylian Mbappé,80,18,25,Paris Saint-Germain,Mauricio Pochettino,"$30,000,000"


In [3]:
# Details of dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Player Name      10 non-null     object
 1   Chances Created  10 non-null     int64 
 2   Assists          10 non-null     int64 
 3   Goals            10 non-null     int64 
 4   Club             10 non-null     object
 5   Manager          10 non-null     object
 6   Annual Salary    10 non-null     object
dtypes: int64(3), object(4)
memory usage: 692.0+ bytes


In [4]:
# Check for missing data
df.isnull().sum()

Player Name        0
Chances Created    0
Assists            0
Goals              0
Club               0
Manager            0
Annual Salary      0
dtype: int64

In [5]:
# Check for for duplicate data
df.duplicated().sum()

0

In [6]:
# Create copy of dataset
df_copy = df.copy()

# Convert Annual Salary dtype to int

# Function to convert annual salary
def convert_to_int(df):
    df['Annual Salary'] = df['Annual Salary'].str.replace("$","")
    df['Annual Salary'] = df['Annual Salary'].str.replace(",","").astype(int)
    return df

convert_to_int(df=df_copy)
df_copy.head()

Unnamed: 0,Player Name,Chances Created,Assists,Goals,Club,Manager,Annual Salary
0,Cristiano Ronaldo,85,20,35,Manchester United,Ole Gunnar Solskjær,35000000
1,Lionel Messi,90,25,30,Paris Saint-Germain,Mauricio Pochettino,40000000
2,Robert Lewandowski,70,15,40,Bayern Munich,Julian Nagelsmann,25000000
3,Erling Haaland,60,10,35,Borussia Dortmund,Marco Rose,20000000
4,Kylian Mbappé,80,18,25,Paris Saint-Germain,Mauricio Pochettino,30000000


In [7]:
# Create Conversion Rate Column
df_copy["Conversion Rate"] = (df_copy["Goals"]/df_copy["Chances Created"])*100
df_copy.head()

Unnamed: 0,Player Name,Chances Created,Assists,Goals,Club,Manager,Annual Salary,Conversion Rate
0,Cristiano Ronaldo,85,20,35,Manchester United,Ole Gunnar Solskjær,35000000,41.176471
1,Lionel Messi,90,25,30,Paris Saint-Germain,Mauricio Pochettino,40000000,33.333333
2,Robert Lewandowski,70,15,40,Bayern Munich,Julian Nagelsmann,25000000,57.142857
3,Erling Haaland,60,10,35,Borussia Dortmund,Marco Rose,20000000,58.333333
4,Kylian Mbappé,80,18,25,Paris Saint-Germain,Mauricio Pochettino,30000000,31.25


In [8]:
# Player with Highest Conversion Rate

# Sort data by conversion rate
df_sorted = df_copy.sort_values(by="Conversion Rate", ascending=False)

player_highestCR = df_sorted.loc[:,"Player Name"].head(1)

# Conversion Rate by player
player_CR = df_sorted.loc[:, "Conversion Rate"].head(1)

print(f"Player with highest conversion rate is {player_highestCR.iloc[0]} "
      f"with conversion rate of {player_CR.iloc[0]:.2f}%")

Player with highest conversion rate is Erling Haaland with conversion rate of 58.33%


In [9]:
# Create a hierarchical index

df_copy.set_index(["Player Name", "Club"],inplace=True)
df_copy.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Chances Created,Assists,Goals,Manager,Annual Salary,Conversion Rate
Player Name,Club,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Cristiano Ronaldo,Manchester United,85,20,35,Ole Gunnar Solskjær,35000000,41.176471
Lionel Messi,Paris Saint-Germain,90,25,30,Mauricio Pochettino,40000000,33.333333
Robert Lewandowski,Bayern Munich,70,15,40,Julian Nagelsmann,25000000,57.142857
Erling Haaland,Borussia Dortmund,60,10,35,Marco Rose,20000000,58.333333
Kylian Mbappé,Paris Saint-Germain,80,18,25,Mauricio Pochettino,30000000,31.25


In [10]:
# Fetch annual salary of Romelu Lukaku
salary_lukaku = df_copy.loc[("Romelu Lukaku","Chelsea"),"Annual Salary"]
print(f"The annual salary of Romelu Lukaku is {salary_lukaku}")

The annual salary of Romelu Lukaku is 27000000


In [11]:
# Unsstack the dataset

df2 = df_copy.unstack(level=["Player Name","Club"])
df2.head()

                 Player Name         Club               
Chances Created  Cristiano Ronaldo   Manchester United      85
                 Lionel Messi        Paris Saint-Germain    90
                 Robert Lewandowski  Bayern Munich          70
                 Erling Haaland      Borussia Dortmund      60
                 Kylian Mbappé       Paris Saint-Germain    80
dtype: object

In [12]:
# Sort the data index
df2.sort_index(inplace=True)

# Karim Benzema Chances Created
df2.loc[("Chances Created","Karim Benzema")].iloc[0]

75

In [13]:
# Combined salary of Lionel Messi and Kylian Mbappe

# Messi's Annual Salary
messi_salary = df2.loc[("Annual Salary","Lionel Messi")].iloc[0]

# Mbappe's Annual Salary
mbappe_salary = df2.loc[("Annual Salary","Kylian Mbappé")].iloc[0]

combined_salary = messi_salary + mbappe_salary
print(f"The combined annual salary of Lionel Messi and Kylian Mbappe is ${combined_salary}")

The combined annual salary of Lionel Messi and Kylian Mbappe is $70000000
