In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, davies_bouldin_score
import warnings
warnings.filterwarnings('ignore')

# Set style for better visualizations
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Load the dataset
df = pd.read_csv('mlbb_heroes_aggregated.csv')

# Calculate additional features
df['total_matches'] = df['total_picks'] + df['total_bans']
df['pick_ban_rate'] = df['total_matches'] / df['total_matches'].max() * 100
df['ban_rate'] = df['total_bans'] / df['total_matches'] * 100
df['win_rate_per_pick'] = df['total_wins'] / df['total_picks'] * 100
df['performance_score'] = (df['overall_win_rate'] * 0.4 + 
                          (df['total_picks'] / df['total_picks'].max() * 100) * 0.3 + 
                          (df['ban_rate']) * 0.3)

# Display basic info
print("=" * 80)
print("MLBB HERO TOURNAMENT PERFORMANCE ANALYSIS (2018-2025)")
print("=" * 80)
print(f"\nðŸ“Š Dataset Overview:")
print(f"   â€¢ Total Heroes Analyzed: {df.shape[0]:,}")
print(f"   â€¢ Time Period: 2018-2025 Tournaments")
print(f"   â€¢ Total Matches (Picks + Bans): {df['total_matches'].sum():,}")
print(f"   â€¢ Average Win Rate Across All Heroes: {(df['total_wins'].sum() / df['total_picks'].sum() * 100):.2f}%")

print(f"\nðŸŽ® Role Distribution:")
role_counts = df['Primary_Role'].value_counts()
for role, count in role_counts.items():
    percentage = (count / len(df)) * 100
    print(f"   â€¢ {role}: {count} heroes ({percentage:.1f}%)")

print("\nðŸ“ˆ Performance Metrics Range:")
print(f"   â€¢ Win Rate: {df['overall_win_rate'].min():.1f}% - {df['overall_win_rate'].max():.1f}%")
print(f"   â€¢ Total Picks: {df['total_picks'].min():,} - {df['total_picks'].max():,}")
print(f"   â€¢ Total Bans: {df['total_bans'].min():,} - {df['total_bans'].max():,}")

print("\nFirst 10 rows of dataset:")
print(df.head(10))
print(f"\nDataset Shape: {df.shape}")
print(f"\nMissing values: {df.isnull().sum().sum()}")

In [None]:
# Role distribution analysis
role_counts = df['Primary_Role'].value_counts()

fig, axes = plt.subplots(1, 3, figsize=(18, 6))

# Pie Chart 1: Role Distribution
colors1 = plt.cm.Set3(np.linspace(0, 1, len(role_counts)))
axes[0].pie(role_counts.values, labels=role_counts.index, autopct='%1.1f%%', 
            startangle=90, colors=colors1, textprops={'fontsize': 10})
axes[0].set_title('Hero Role Distribution', fontsize=14, fontweight='bold')
axes[0].axis('equal')

# Pie Chart 2: Role-wise Average Win Rate
role_avg_winrate = df.groupby('Primary_Role')['overall_win_rate'].mean().sort_values()
colors2 = plt.cm.Paired(np.linspace(0, 1, len(role_avg_winrate)))
axes[1].pie(role_avg_winrate.values, labels=role_avg_winrate.index, autopct='%1.1f%%',
            startangle=90, colors=colors2, textprops={'fontsize': 10})
axes[1].set_title('Average Win Rate by Role', fontsize=14, fontweight='bold')
axes[1].axis('equal')

# Bar plot: Role-wise hero count
sns.barplot(x=role_counts.index, y=role_counts.values, ax=axes[2], palette='viridis')
axes[2].set_title('Number of Heroes per Role', fontsize=14, fontweight='bold')
axes[2].set_xlabel('Role')
axes[2].set_ylabel('Count')
axes[2].tick_params(axis='x', rotation=45)
for i, v in enumerate(role_counts.values):
    axes[2].text(i, v + 0.5, str(v), ha='center', va='bottom', fontweight='bold')

plt.tight_layout()
plt.show()

print("\n" + "="*70)
print("ROLE ANALYSIS")
print("="*70)
for role in df['Primary_Role'].unique():
    role_data = df[df['Primary_Role'] == role]
    print(f"\n{role}:")
    print(f"  Number of heroes: {len(role_data):>3}")
    print(f"  Average win rate: {role_data['overall_win_rate'].mean():>6.2f}%")
    print(f"  Average picks:    {role_data['total_picks'].mean():>6.0f}")
    print(f"  Average bans:     {role_data['total_bans'].mean():>6.0f}")
    print(f"  Total matches:    {role_data['total_matches'].sum():>6,}")