# Football Player Analysis
Analysis of football players from the United States, Portugal, Argentina, Brazil, Spain, and England.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
file_path = '/mnt/data/players_21.csv'
df = pd.read_csv(file_path)

# Show the first few rows of the dataset to get an idea of its structure
df.head()

In [2]:
# Filter the dataset to only include players from the specified countries
selected_countries = ['United States', 'Portugal', 'Argentina', 'Brazil', 'Spain', 'England']
filtered_df = df[df['nationality'].isin(selected_countries)]

# Show the first few rows of the filtered dataset
filtered_df.head()

## Bar Plot for 'Overall' Ratings

In [3]:
# Create a bar plot to analyze the 'overall' ratings of players from selected countries
plt.figure(figsize=(12, 6))
sns.barplot(x='nationality', y='overall', data=filtered_df, estimator=lambda x: sum(x)/len(x), ci=None)
plt.title('Average Overall Ratings of Football Players by Country')
plt.xlabel('Country')
plt.ylabel('Average Overall Rating')
plt.show()

## Histogram and Box Plot for Player Heights

In [4]:
# Create a histogram and a box plot to explore the average height of players from selected countries

# Set up the subplots
fig, axes = plt.subplots(1, 2, figsize=(18, 6))

# Histogram
sns.histplot(filtered_df, x='height_cm', hue='nationality', element='step', stat='density', common_norm=False, ax=axes[0])
axes[0].set_title('Distribution of Player Heights by Country')
axes[0].set_xlabel('Height (cm)')
axes[0].set_ylabel('Density')

# Box Plot
sns.boxplot(x='nationality', y='height_cm', data=filtered_df, ax=axes[1])
axes[1].set_title('Boxplot of Player Heights by Country')
axes[1].set_xlabel('Country')
axes[1].set_ylabel('Height (cm)')

plt.tight_layout()
plt.show()

## Scatter Plot for Player Weights

In [5]:
# Create a scatter plot to see how the weight of players is distributed among the selected countries

plt.figure(figsize=(12, 6))
sns.scatterplot(x='nationality', y='weight_kg', data=filtered_df, hue='nationality', palette='Set1')
plt.title('Distribution of Player Weights by Country')
plt.xlabel('Country')
plt.ylabel('Weight (kg)')
plt.legend(title='Country', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

## Pie Chart for Top 10 Most Valuable Players

In [6]:
# Create a pie chart to see the top 10 most valuable players from the selected countries

# Sort the players by their value in euros and pick the top 10
top_10_valuable_players = filtered_df.sort_values('value_eur', ascending=False).head(10)

# Plot the pie chart
plt.figure(figsize=(10, 8))
plt.pie(top_10_valuable_players['value_eur'], labels=top_10_valuable_players['short_name'], autopct='%1.1f%%', startangle=90)
plt.title('Top 10 Most Valuable Players from Selected Countries')
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

plt.show()