In [2]:
import pandas as pd
import plotly.express as px

# Load FIFA 22 player data
df = pd.read_csv('FIFA22_official_data.csv')

# Filter players with a Best Overall Rating greater than 85
df_filtered = df[df['Best Overall Rating'] > 85]

# Scatter plot of Best Overall Rating on a world map
fig_map = px.scatter_geo(df_filtered,
                         locations='Nationality',
                         locationmode='country names',
                         color='Best Overall Rating',
                         size='Overall',  
                         hover_name='Name',
                         title='Best Overall Rating of Players (Rating > 85)',
                         color_continuous_scale=px.colors.sequential.Viridis,  
                         labels={'Best Overall Rating': 'Rating'},  
                         template='plotly_dark',
                         projection='natural earth'  
                         )

fig_map.update_traces(
    mode='markers',
    marker=dict(line=dict(width=0.5, color='White')),
)

fig_map.update_layout(coloraxis_colorbar=dict(title='Player Ratings'))

fig_map.update_layout(
    title='Best Overall Rating of Players (Rating > 85)',
    font=dict(family="Arial", size=18, color="white"),
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    margin=dict(l=0, r=0, t=50, b=0),
)

# Display map and provide conclusion
fig_map.show()

# Conclusion 1
player_a = df_filtered.loc[df_filtered['Name'] == 'Player A']
if not player_a.empty:
    print("Conclusion 1: The geographical distribution of high-rated players (Rating > 85) shows that top-rated players are spread across various countries.")
    print(f"- Notable player: '{player_a['Name'].values[0]}' from {player_a['Nationality'].values[0]} has one of the highest overall ratings.")
else:
    print("Conclusion 1: The geographical distribution of high-rated players (Rating > 85) shows that top-rated players are spread across various countries.")
   

# Player distribution by position
position_counts = df_filtered['Position'].value_counts().reset_index()
position_counts.columns = ['Position', 'Count']

fig_position = px.bar(position_counts, x='Position', y='Count', 
                      title='Distribution of High-Rated Players by Position',
                      labels={'Position': 'Position', 'Count': 'Number of Players'},
                      template='plotly_dark')

# Display bar chart and provide conclusion
fig_position.show()     
player_b = df_filtered.loc[df_filtered['Position'] == 'Forward'].head(1)
print("Conclusion 2: The bar chart displaying the distribution of high-rated players by position indicates that certain positions have a higher concentration of top-rated players.")

if not player_b.empty:
    print(f"- Notable player: '{player_b['Name'].values[0]}' excels in the 'Forward' position.")
else:
    print("")

# Wage vs. Overall Rating scatter plot
fig_wage_rating = px.scatter(df_filtered, x='Overall', y='Wage',
                             title='Wage vs. Overall Rating for High-Rated Players',
                             labels={'Overall': 'Overall Rating', 'Wage': 'Wage'},
                             template='plotly_dark')
fig_wage_rating.update_traces(marker=dict(size=8, opacity=0.8))

# Display scatter plot and provide conclusion
fig_wage_rating.show()
players_cd = df_filtered.loc[df_filtered['Overall'] > 90].head(2)
print("Conclusion 3: The scatter plot of Wage vs. Overall Rating reveals that some players with high overall ratings also command high wages, suggesting a correlation between skill level and salary.")

print("Important conclusion-This shows that inspite of same ratings players are getting different wages")
if not players_cd.empty:
    print(f"- Notable players: '{players_cd['Name'].values[0]}' and '{players_cd['Name'].values[1]}' have high wages and ratings.")
else:
    print("- No specific player information available for high-rated players in the dataset.")

# Age distribution of high-rated players
fig_age_distribution = px.histogram(df_filtered, x='Age',
                                   title='Age Distribution of High-Rated Players',
                                   labels={'Age': 'Age', 'count': 'Number of Players'},
                                   template='plotly_dark')

# Display age distribution chart and provide conclusion
fig_age_distribution.show()
print("Conclusion 4: The age histogram of high-rated players shows that there is a diverse age range among top-rated players, with a concentration around a certain age group.")
#print("- Notable trend: There is a significant number of top-rated players in the age group 25-30.")

# Distribution of high-rated players by nationality
nationality_counts = df_filtered['Nationality'].value_counts().reset_index()
nationality_counts.columns = ['Nationality', 'Count']

fig_nationality = px.pie(nationality_counts, values='Count', names='Nationality',
                         title='Distribution of High-Rated Players by Nationality',
                         template='plotly_dark')

# Display nationality pie chart and provide conclusion
fig_nationality.show()
print("Conclusion 5: The pie chart illustrates the distribution of high-rated players by nationality, highlighting the diversity of nationalities among top-rated players.")
print("- Notable insight: 'France' has a significant representation of high-rated players.")

# Top clubs with high-rated players
top_clubs = df_filtered['Club'].value_counts().reset_index().head(10)
top_clubs.columns = ['Club', 'Count']

fig_top_clubs = px.bar(top_clubs, x='Club', y='Count',
                       title='Top Clubs with High-Rated Players',
                       labels={'Club': 'Club', 'Count': 'Number of High-Rated Players'},
                       template='plotly_dark')

# Display top clubs bar chart and provide conclusion
fig_top_clubs.show()
print("Conclusion 6: The bar chart of the top clubs with high-rated players indicates the clubs with the most representation of top-rated players, showcasing the teams with strong player rosters.")
print("- Notable clubs: 'Manchester city' and 'Paris Saint Germain' have the highest number of high-rated players.")

Conclusion 1: The geographical distribution of high-rated players (Rating > 85) shows that top-rated players are spread across various countries.


Conclusion 2: The bar chart displaying the distribution of high-rated players by position indicates that certain positions have a higher concentration of top-rated players.



Conclusion 3: The scatter plot of Wage vs. Overall Rating reveals that some players with high overall ratings also command high wages, suggesting a correlation between skill level and salary.
Important conclusion-This shows that inspite of same ratings players are getting different wages
- Notable players: 'K. De Bruyne' and 'L. Messi' have high wages and ratings.


Conclusion 4: The age histogram of high-rated players shows that there is a diverse age range among top-rated players, with a concentration around a certain age group.


Conclusion 5: The pie chart illustrates the distribution of high-rated players by nationality, highlighting the diversity of nationalities among top-rated players.
- Notable insight: 'France' has a significant representation of high-rated players.


Conclusion 6: The bar chart of the top clubs with high-rated players indicates the clubs with the most representation of top-rated players, showcasing the teams with strong player rosters.
- Notable clubs: 'Manchester city' and 'Paris Saint Germain' have the highest number of high-rated players.
