In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**Import all neccessary libraries**

In [None]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.graph_objects as go
# Module for creating blocks
from plotly.subplots import make_subplots
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
print("Setup Complete")

![](http://wallpapercave.com/wp/wp6913.jpg)

# **CONTENTS**
1. Data Visualisation
2. Top 11 players for a Club/Country
3. Dream Team

In [None]:
dataset=pd.read_csv('/kaggle/input/fifa-20-complete-player-dataset/players_20.csv')

In [None]:
dataset.head()

# ***DATA EXPLORATION***

**Dropping columns which are not required such as weight, height, dob, player url etc.**

In [None]:
del_col_list=['weight_kg', 'height_cm', 'dob', 'sofifa_id', 'player_url', 'long_name','preferred_foot', 'international_reputation', 'weak_foot','team_jersey_number', 'loaned_from', 'joined', 'contract_valid_until','nation_position', 'nation_jersey_number', 'shooting','power_long_shots', 'mentality_aggression', 'mentality_interceptions','mentality_positioning', 'mentality_vision', 'mentality_penalties','mentality_composure', 'defending_marking', 'defending_standing_tackle', 'skill_ball_control', 'movement_acceleration', 'movement_sprint_speed', 'ls', 'st', 'rs','lw', 'lf', 'cf', 'rf', 'rw', 'lam', 'cam', 'ram', 'lm', 'lcm', 'cm','rcm', 'rm', 'lwb', 'ldm', 'cdm', 'rdm', 'rwb', 'lb', 'lcb', 'cb','rcb', 'rb']
dataset=dataset.drop(del_col_list, axis=1)
dataset.head()

In [None]:
print(dataset.shape)
dataset = dataset.drop_duplicates(keep='first')
print(dataset.shape)

In [None]:
missing_data = dataset.isnull().sum().sort_values(ascending=False)
missing_data = missing_data.reset_index(drop=False)
missing_data = missing_data.rename(columns={"index": "Columns", 0: "Value"})
missing_data['Proportion'] = (missing_data['Value']/len(dataset))*100

In [None]:
cols = ["dribbling", "defending", "physic", "passing", "pace"]
for col in cols:
    dataset[col] = dataset[col].fillna(dataset[col].median())
dataset = dataset.fillna(0)
dataset.isnull().sum()

*Sorting the data based on the wage of the players.*

In [None]:
expensive_dataset = dataset.sort_values(by ='wage_eur', ascending=False).head()
expensive_dataset

In [None]:
dataset.columns

In [None]:
skillers = dataset[(dataset["skill_moves"] == 4) | (dataset["skill_moves"] == 5)]
skiller_nations = skillers["nationality"].value_counts(normalize=True)
rest = skiller_nations[10:].sum()
skiller_nations = skiller_nations[:10]
skiller_nations["Other"] = rest
pie, ax = plt.subplots(figsize=[12,12])
labels = skiller_nations.keys()
plt.pie(x=skiller_nations, autopct="%.1f%%", labels=labels, pctdistance=0.5, explode=[0.05]*11);
plt.legend(loc="upper right")
plt.title("Skill moves and countries", fontsize=14);

This shows that most football players are from Brazil, followed by Spain and Argentina.

![](http://i.ytimg.com/vi/NOras8n86Fo/maxresdefault.jpg)

# **L. Messi Vs Cristiano Ronaldo**

In [None]:
def find_min_max_in(col):

    top = dataset[col].idxmax()
    top_df = pd.DataFrame(dataset.loc[0])
    
    bottom = dataset[col].idxmin()
    bottom_df = pd.DataFrame(dataset.loc[1])
    
    info_df = pd.concat([top_df, bottom_df], axis=1)
    return info_df

find_min_max_in('wage_eur')

**Analizing various players based on their potential**

In [None]:
potential_attribute = dataset.sort_values(by ='potential', ascending=False).head()
potential_attribute

In [None]:
sns.scatterplot(x=dataset['wage_eur'], y=dataset['potential'])

**Arranging players with potential higher than 93**

In [None]:
dataset[dataset['potential'] >= 93]

In [None]:
def create_polarcharts(
    stats: list,
    color: str,
    img_link: str,
    name_one: str,
    name_two: str
):
    '''
    The function accepts the following arguments:
    
        stats - takes a list of numeric values of characteristics
        color - takes the color of the lines in the diagram
        img_link - accepts a link to an image of a football player
        name_one - takes the name of the footballer
        name_two - accepts any additional text
    
    '''
    # Determine the number of rows and columns
    fig = make_subplots(rows=1, cols=2, 
                        # We indicate the types of graphs in each block
                        specs=[[{'type': 'xy'}, {"type": "polar"}]], 
                        # Setting the width of each column
                        column_widths=[0.5, 0.5])  
    
    # Create a Polar Chart
    fig.add_trace(go.Scatterpolar(
                     # Passing numeric parameters
                     r=stats,
                     # Passing parameter names
                     theta=['Pace', 'Shooting', 'Passing', 'Dribbling', 'Defending', 'Physic', 'Pace'],
                     # Setting the fill parameter
                     fill='toself',
                     # Specify the signature on hover
                     hovertemplate='<b>%{theta}</b>' + f'<b>: '+'%{r}',
                     # Specify a caption for the legend
                     name='',
                     # Specifying the line color
                     line=dict(color=color)),
                     # Specify line and column numbers
                     row=1, col=2)
    
    # Add an image to the chart
    fig.layout.images = [dict(
               # Passing a link to the image
               source=img_link,
               # Specify the position of the image along the x-axis
               x=0.05, 
               # Specify the position of the image along the y-axis
               y=0.5,
               # Setting the size of the chart
               sizex=1, 
               sizey=1.6,
               # Setting the position along the x-axis
               xanchor="center", 
               # Setting the y-axis position
               yanchor="middle",
               # Place the image under the chart
               layer="below"
                             )
                        ]

    fig.update_layout(
        # Set the name of the chart
        title=f'<b>{name_one}</b><br><sub>{name_two}</sub>',
        # Setting the background color
        paper_bgcolor="rgb(205, 228, 255)",
        # Setting the chart theme
        template='xgridoff',
        # Passing chart parameters
        polar=dict(
             # Background color
             bgcolor="rgb(205, 228, 255)",
             # Adding a line with numeric divisions
             radialaxis=dict(
                        # Displaying the line
                        visible = True,
                        # Set the range of divisions
                        range = [0, 100]
                            )
                   ), 
        # Passing the parameters to the font
        font=dict(
                  # Font type
                  family='Poppins',
                  # Font size
                  size=18,
                  # Font color
                  color='Black'
                 )
    )

    # Displaying the graph
    fig.show()

In [None]:
create_polarcharts([85, 92, 91, 95, 38, 65, 85], 
                   'MidnightBlue',
                   'https://lh3.googleusercontent.com/proxy/fn0SPZLOyA0QQIPYN0EXPomGEVu7KtwvS5TrajsxrCLeR1aiSp2kSpDSqyilCzKgsH3ACCEA7Yb1O1xkVPIhzQzxcBvYRn1BHJUWYMDZQdRvNt0NRm4fFCMfo6iV5hJErZcUOA',
                   'L. Messi',
                   'Forward in FC Barcelona')

In [None]:
graph1 = pd.DataFrame(np.random.rand(10, 6), columns=["skill_curve", "skill_fk_accuracy", "skill_long_passing", "movement_agility", "movement_reactions", "movement_balance"])
graph1.plot.box();

In [None]:
sns.heatmap(data = dataset[['age','overall','potential','skill_moves','value_eur','wage_eur','passing']].corr(), annot = True, vmin = -1, vmax = 1, center = 0, cmap="YlGnBu")

In [None]:
# Explore visually the relationship between 'passing' and 'defending' through a scatterplot
fig, ax = plt.subplots(figsize=(15,8))
ax.scatter(dataset['age'], dataset['overall'], color='violet')
ax.set_xlabel('age')
ax.set_ylabel('overall')
plt.show()

In [None]:
dataset.columns

In [None]:
unique_ages = dataset["age"].unique()
unique_ages = sorted(unique_ages)
paces = []
counts = []
for age in unique_ages:
    avg_df = dataset[dataset["age"] == age]["physic"]
    count = avg_df.count()
    mean = avg_df.mean()
    paces.append( mean)
    counts.append(count)
sns.set_style("white")
plt.figure(figsize=(8, 8));
sns.scatterplot(x=unique_ages, y=counts, color="red", size=counts);
sns.despine()
plt.title("Frequency of Ages");
plt.xlabel("Ages");
plt.ylabel("Count");

This shows that most of the football players tend to be in the 20-30 age group, which naturally decreases as player age increases.

In [None]:
plt.figure(figsize=(8, 8))
sns.scatterplot(x=unique_ages, y=paces, size=paces, color="magenta");
sns.despine()
plt.title("Age vs Average Physic");
plt.xlabel("Ages");
plt.ylabel("Average Physic");

This depicts that the average physic of players tends to increase as they grow older

# **Figure for the best players in a club/country**

![](http://content.api.news/v3/images/bin/3e71dbd6f726c5240767dcda0ba0c465)

In [None]:
def Club(x):
    return dataset[dataset['club'] == x ][
        ['short_name','age','club',"player_positions","potential","value_eur"]].sort_values(by=['potential'],ascending=False)
def Country(x):
    return dataset[dataset['nationality'] == x ][['short_name','age','nationality',"player_positions",
                                            "potential","value_eur"]].sort_values(by=['potential'],ascending=False)


In [None]:
Chelsea = Club("Chelsea")
PSG = Club("Paris Saint-Germain")
Juventus = Club("Juventus")
BVB = Club("Borussia Dortmund")
Germany = Country("Germany")
England = Country("England")
France = Country("France")

In [None]:
plt.figure(figsize=(30, 12))
plt.bar(Chelsea["short_name"].head(11),Chelsea["potential"].head(11),color="b")
plt.xticks(rotation=90,fontsize=20)
plt.yticks(fontsize=20)
plt.title("Chelsea players", fontsize = 25)
plt.ylabel("potential",fontsize = 20)
plt.xlabel("Player",fontsize = 20)
plt.grid()
plt.show()

plt.figure(figsize=(30, 12))
plt.bar(Juventus["short_name"].head(11),Juventus["potential"].head(11),color="k")
plt.xticks(rotation=90,fontsize=20)
plt.yticks(fontsize=20)
plt.title("Juventus players", fontsize = 25)
plt.ylabel("potential",fontsize = 20)
plt.xlabel("Player",fontsize = 20)
plt.grid()
plt.show()

plt.figure(figsize=(30, 12))
plt.bar(Germany["short_name"].head(11),Germany["potential"].head(11),color="r")
plt.xticks(rotation=90,fontsize=20)
plt.yticks(fontsize=20)
plt.title("Top 11 German players", fontsize = 25)
plt.ylabel("potential",fontsize = 20)
plt.xlabel("Player",fontsize = 20)
plt.grid()
plt.show()

plt.figure(figsize=(30, 12))
plt.bar(France["short_name"].head(11),France["potential"].head(11),color="g")
plt.xticks(rotation=90,fontsize=20)
plt.yticks(fontsize=20)
plt.title("Top 11 French players", fontsize = 25)
plt.ylabel("potential",fontsize = 20)
plt.xlabel("Player",fontsize = 20)
plt.grid()
plt.show()

In [None]:
dataset.player_positions.head(18278)

In [None]:
dataset.skill_moves.head(18278)

In [None]:
dataset.goalkeeping_handling.head(18278)

# **MAKING DREAM TEAM FOR ARGENTINA**

![](http://www.sdxcentral.com/wp-content/uploads/2020/09/Intel-AMD-Nvidia-and-Ampere-Join-Oracle-Cloud-Dream-Team.jpg)

**Starting with goalkeeper**

In [None]:
def Country(x):
    def find_min_max_in(col):
        work = dataset[col].idxmax()
        work_df = pd.DataFrame(dataset.loc[work])
        return work_df
    return dataset[dataset['nationality'] == x ][['goalkeeping_handling', 'short_name']].sort_values(by=['goalkeeping_handling'],ascending=False)
find_min_max_in('goalkeeping_handling')
Country1 = Country("Argentina")
print(Country1)
goalkeeper = Country1.iloc[0][1]
print("The goalkeeper is :", goalkeeper)
# test = dataset.drop(dataset.short_name[goalkeeper])
# dataset = dataset.drop(dataset.index[[3]])
# dataset.drop(dataset.short_name[[goalkeeper]], inplace = True)
# dataset = pd.DataFrame(dataset)
# for col in dataset.row:
#     if 'goalkeeper' in row:
#         del df[row]
  
# dataset
dataset.drop(dataset[dataset['short_name']==goalkeeper].index, inplace = True)
  

Removing a particular player from dataset is important to make sure players are not repeated in the final team list.

**Centreback**

In [None]:
# col = dataset.loc[: , "movement_agility":"movement_reactions"]
# dataset['centreback_mean'] = col.mean(axis=1)
dataset['centreback_mean']=dataset['movement_agility'] + dataset['pace']+ dataset['movement_reactions'] + dataset['passing'] + dataset['defending_sliding_tackle'] + dataset['defending']
dataset

In [None]:
def Country(x):
    def find_min_max_in(col):
        work = dataset[col].idxmax()
        work_df = pd.DataFrame(dataset.loc[work])
        return work_df
    return dataset[dataset['nationality'] == x ][['centreback_mean', 'short_name']].sort_values(by=['centreback_mean'],ascending=False)
find_min_max_in('centreback_mean')
Country1 = Country("Argentina")
print(Country1)
centreback=Country1.iloc[0][1]
print("The centreback is :", centreback)
# dataset=dataset.drop(dataset.index[[822]])
dataset.drop(dataset[dataset['short_name']==centreback].index, inplace = True)

**Centremidfielder**

In [None]:
# col = dataset.loc[: , "passing":"dribbling"]
# dataset['centremidfielder_mean'] = col.mean(axis=1)
# dataset
dataset['centremidfielder_mean']=dataset['dribbling'] + dataset['passing'] + dataset['pace'] + dataset['movement_agility']+ dataset['movement_reactions'] + dataset['movement_balance'] + dataset['attacking_short_passing']+ dataset['attacking_heading_accuracy']
dataset

In [None]:
def Country(x):
    def find_min_max_in(col):
        work = dataset[col].idxmax()
        work_df = pd.DataFrame(dataset.loc[work])
        return work_df
    return dataset[dataset['nationality'] == x ][['centremidfielder_mean', 'short_name']].sort_values(by=['centremidfielder_mean'],ascending=False)
find_min_max_in('centremidfielder_mean')
Country1 = Country("Argentina")
print(Country1)
centremidfielder1=Country1.iloc[0][1]
print("The centremidfielder one is :", centremidfielder1)
centremidfielder2=Country1.iloc[1][1]
print("The centremidfielder two is :", centremidfielder2)
centremidfielder3=Country1.iloc[2][1]
print("The centremidfielder three is :", centremidfielder3)
# dataset=dataset.drop(dataset.index[[152]])
# dataset=dataset.drop(dataset.index[[891]])
# dataset=dataset.drop(dataset.index[[825]])
dataset.drop(dataset[dataset['short_name']==centremidfielder1].index, inplace = True)
dataset.drop(dataset[dataset['short_name']==centremidfielder2].index, inplace = True)
dataset.drop(dataset[dataset['short_name']==centremidfielder3].index, inplace = True)

**Attacking Mid fielder**

In [None]:
# col = dataset.loc[: , "attacking_finishing":"power_strength"]
# dataset['attackingmidfielder_mean'] = col.mean(axis=1)
# dataset
dataset['attackingmidfielder_mean']=dataset['attacking_finishing'] + dataset['passing'] + dataset['pace'] + dataset['attacking_heading_accuracy']+ dataset['attacking_short_passing'] + dataset['attacking_volleys'] + dataset['skill_curve'] 
dataset

In [None]:
def Country(x):
    def find_min_max_in(col):
        work = dataset[col].idxmax()
        work_df = pd.DataFrame(dataset.loc[work])
        return work_df
    return dataset[dataset['nationality'] == x ][['attackingmidfielder_mean', 'short_name']].sort_values(by=['attackingmidfielder_mean'],ascending=False)
# find_min_max_in('attackingmidfielder_mean')
Country1 = Country("Argentina")
print(Country1)
attackingmidfielder1=Country1.iloc[0][1]
print("The attackingmidfielder one is :", attackingmidfielder1)
attackingmidfielder2=Country1.iloc[1][1]
print("The attackingmidfielder two is :", attackingmidfielder2)
# dataset=dataset.drop(dataset.index[[2042]])
# print(dataset.index[dataset['short_name']].tolist())
# dataset=dataset.drop(dataset.index[[876]])
dataset.drop(dataset[dataset['short_name']==attackingmidfielder1].index, inplace = True)
dataset.drop(dataset[dataset['short_name']==attackingmidfielder2].index, inplace = True)

**Fullback**

In [None]:
# col = dataset.loc[: , "defending":"defending_sliding_tackle"]
# dataset['fullback_mean'] = col.mean(axis=1)
# dataset
dataset['fullback_mean']=dataset['defending_sliding_tackle'] + dataset['passing'] + dataset['defending'] + dataset['skill_long_passing'] + dataset['power_strength'] 
dataset

In [None]:
def Country(x):
    def find_min_max_in(col):
        work = dataset[col].idxmax()
        work_df = pd.DataFrame(dataset.loc[work])
        return work_df
    return dataset[dataset['nationality'] == x ][['fullback_mean', 'short_name']].sort_values(by=['fullback_mean'],ascending=False)
# find_min_max_in('fullback_mean')
Country1 = Country("Argentina")
print(Country1)
fullback1=Country1.iloc[0][1]
print("The fullback one is :", fullback1)
fullback2=Country1.iloc[1][1]
print("The fullback two is :", fullback2)
fullback3=Country1.iloc[2][1]
print("The fullback three is :", fullback3)

# dataset=dataset.drop(dataset.index[[2054]])
# dataset=dataset.drop(dataset.index[[890]])
# dataset=dataset.drop(dataset.index[[825]])
dataset.drop(dataset[dataset['short_name']==fullback1].index, inplace = True)
dataset.drop(dataset[dataset['short_name']==fullback2].index, inplace = True)
dataset.drop(dataset[dataset['short_name']==fullback3].index, inplace = True)

**Stricker**

In [None]:
# col = dataset.loc[: , "skill_curve":"power_shot_power"]
# dataset['stricker_mean'] = col.mean(axis=1)
# dataset
dataset['stricker_mean']=dataset['attacking_finishing'] + dataset['attacking_heading_accuracy'] + dataset['attacking_crossing'] + dataset['pace'] + dataset['skill_curve'] + dataset['skill_fk_accuracy'] + dataset['movement_balance'] 
dataset

In [None]:
def Country(x):
#     def find_min_max_in(col):
#         work = dataset[col].idxmax()
#         work_df = pd.DataFrame(dataset.loc[work])
#         return work_df
    return dataset[dataset['nationality'] == x ][['stricker_mean', 'short_name']].sort_values(by=['stricker_mean'],ascending=False)
# find_min_max_in('stricker_mean')
Country1 = Country("Argentina")
print(Country1)
stricker=Country1.iloc[0][1]
print("The stricker is :", stricker)
# dataset=dataset.drop(dataset.index[[822]])
dataset.drop(dataset[dataset['short_name']==stricker].index, inplace = True)

# **FINAL DREAM TEAM**

In [None]:
print("The team is ", stricker, fullback1, fullback2, fullback3, goalkeeper, centremidfielder1, centremidfielder2, centremidfielder3, attackingmidfielder1, attackingmidfielder2, centreback)