# üá¨üá≠ Ghana Black Stars Cluster Analysis

This notebook analyzes Ghana's attacking players to understand:
- Which clusters they belong to
- Their statistical profiles
- How they compare to peers in their clusters
- Squad composition and gap analysis

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import pickle

import matplotlib.pyplot as plt
import seaborn as sns
from math import pi

# mplsoccer for professional football radar charts
try:
    from mplsoccer import Radar, FontManager, grid
    MPLSOCCER_AVAILABLE = True
    print('‚úÖ mplsoccer loaded - using professional radar charts')
except ImportError:
    MPLSOCCER_AVAILABLE = False
    print('‚ö†Ô∏è mplsoccer not installed. Install with: pip install mplsoccer')
    print('   Falling back to matplotlib radar charts')

import warnings
warnings.filterwarnings('ignore')

DATA_DIR = Path("../data")
PROCESSED_DIR = DATA_DIR / "processed"
OUTPUT_DIR = Path("../outputs")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

print("‚úÖ Libraries loaded!")

## 1. Configuration - Ghana Players

In [None]:
# Ghana Black Stars Forwards (with common name variations for matching)
GHANA_PLAYERS = {
    "Mohammed Kudus": ["Kudus", "M. Kudus", "Mohammed Kudus"],
    "Antoine Semenyo": ["Semenyo", "A. Semenyo", "Antoine Semenyo"],
    "Jordan Ayew": ["J. Ayew", "Jordan Ayew", "Ayew"],
    "Ernest Nuamah": ["Nuamah", "E. Nuamah", "Ernest Nuamah"],
    "Osman Bukari": ["Bukari", "O. Bukari", "Osman Bukari"],
    "Fatawu Issahaku": ["Fatawu", "Abdul Fatawu", "A. Fatawu", "Fatawu Issahaku", "Abdul Fatawu Issahaku"],
    "Kamaldeen Sulemana": ["Kamaldeen", "K. Sulemana", "Kamaldeen Sulemana"],
    "Ibrahim Osman": ["I. Osman", "Ibrahim Osman"],
    "Brandon Thomas-Asante": ["Thomas-Asante", "B. Thomas-Asante", "Brandon Thomas-Asante"],
    "I√±aki Williams": ["Inaki Williams", "I. Williams", "I√±aki Williams", "Williams"],
    "Joseph Paintsil": ["Paintsil", "J. Paintsil", "Joseph Paintsil"],
    "Jerry Afriyie": ["Afriyie", "J. Afriyie", "Jerry Afriyie"],
    "Christopher Bonsu Baah": ["Bonsu Baah", "C. Bonsu Baah", "Christopher Bonsu Baah"]
}

print(f"üá¨üá≠ Tracking {len(GHANA_PLAYERS)} Ghana forwards")

## 2. Load Data

In [None]:
# Load clustered data
data_file = PROCESSED_DIR / "forwards_clustered.csv"

if not data_file.exists():
    raise FileNotFoundError(f"Run Notebook 03 first! Missing: {data_file}")

df = pd.read_csv(data_file)
print(f"‚úÖ Loaded {len(df)} forwards")
print(f"üìä Number of clusters: {df['cluster'].nunique()}")

In [None]:
# Load clustering model for additional info
model_file = OUTPUT_DIR / "clustering_model.pkl"
if model_file.exists():
    with open(model_file, 'rb') as f:
        model_data = pickle.load(f)
    CLUSTERING_FEATURES = model_data.get('features', [])
    cluster_profiles = model_data.get('cluster_profiles')
    cluster_zscores = model_data.get('cluster_zscores')
    cluster_names = model_data.get('cluster_names', {})
    print(f"‚úÖ Loaded model with {len(CLUSTERING_FEATURES)} features")
else:
    # Fallback: find per90 features
    CLUSTERING_FEATURES = [c for c in df.columns if 'per90' in c.lower() and '_norm' not in c]
    cluster_profiles = df.groupby('cluster')[CLUSTERING_FEATURES].mean()
    overall_mean = df[CLUSTERING_FEATURES].mean()
    overall_std = df[CLUSTERING_FEATURES].std()
    cluster_zscores = (cluster_profiles - overall_mean) / overall_std
    cluster_names = {i: f"Cluster {i}" for i in range(df['cluster'].nunique())}
    print(f"‚ö†Ô∏è Model not found, calculated from data")

## 3. Find Ghana Players in Dataset

In [None]:
# Identify player column
player_col = None
for col in df.columns:
    if 'player' in col.lower():
        player_col = col
        break

print(f"üîç Player column: {player_col}")

In [None]:
# Search for each Ghana player
def find_player(df, player_col, name, aliases):
    """Find a player by name or aliases, return only one row"""
    all_names = [name] + aliases
    
    for search_name in all_names:
        # Try exact match first
        mask = df[player_col].astype(str).str.lower() == search_name.lower()
        if mask.any():
            return df[mask].iloc[[0]]  # Return only first match
        
        # Try contains
        mask = df[player_col].astype(str).str.lower().str.contains(search_name.lower(), na=False)
        if mask.any():
            return df[mask].iloc[[0]]  # Return only first match
    
    return pd.DataFrame()  # Not found

# Find all Ghana players
ghana_rows = []
found_players = []
not_found_players = []

print("üá¨üá≠ Searching for Ghana players...\n")

for canonical_name, aliases in GHANA_PLAYERS.items():
    result = find_player(df, player_col, canonical_name, aliases)
    
    if not result.empty:
        result = result.copy()
        result['ghana_name'] = canonical_name
        ghana_rows.append(result)
        found_players.append(canonical_name)
        
        cluster_id = result['cluster'].iloc[0]
        print(f"  ‚úÖ {canonical_name}")
        print(f"     ‚Üí Found as: {result[player_col].iloc[0]}")
        print(f"     ‚Üí Cluster: {cluster_id}")
    else:
        not_found_players.append(canonical_name)
        print(f"  ‚ùå {canonical_name} - NOT FOUND")

# Combine all Ghana players
if ghana_rows:
    ghana_df = pd.concat(ghana_rows, ignore_index=True)
    print(f"\nüìä Found {len(ghana_df)} Ghana player records")
else:
    ghana_df = pd.DataFrame()
    print("\n‚ùå No Ghana players found in dataset!")

## 4. Ghana Players Overview

In [None]:
if not ghana_df.empty:
    print("\n" + "="*80)
    print("üá¨üá≠ GHANA BLACK STARS ATTACKERS OVERVIEW")
    print("="*80)
    
    # Display key info
    display_cols = ['ghana_name', 'Squad', 'Pos', 'Age', 'Playing Time_90s', 'cluster', 'cluster_name']
    available_cols = [c for c in display_cols if c in ghana_df.columns]
    display(ghana_df[available_cols].sort_values('cluster'))

## 5. Cluster Distribution

In [None]:
# Which clusters do Ghana attackers belong to?
if not ghana_df.empty:
    cluster_counts = ghana_df['cluster'].value_counts().sort_index()
    
    print("\nüìä GHANA ATTACKERS BY CLUSTER")
    print("-" * 50)
    for cluster_id, count in cluster_counts.items():
        players = ghana_df[ghana_df['cluster'] == cluster_id]['ghana_name'].tolist()
        cluster_desc = cluster_names.get(cluster_id, f"Cluster {cluster_id}")
        print(f"\nCluster {cluster_id}: {count} player(s)")
        print(f"  Description: {cluster_desc}")
        for p in players:
            print(f"    ‚Ä¢ {p}")

In [None]:
# Visualize cluster distribution
if not ghana_df.empty:
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # All clusters distribution vs Ghana
    all_counts = df['cluster'].value_counts().sort_index()
    ghana_counts = ghana_df['cluster'].value_counts().reindex(all_counts.index, fill_value=0)
    
    x = range(len(all_counts))
    width = 0.35
    
    bars1 = ax.bar([i - width/2 for i in x], all_counts.values / len(df) * 100, 
                   width, label='All Forwards (%)', alpha=0.7, color='lightgray')
    bars2 = ax.bar([i + width/2 for i in x], ghana_counts.values / max(len(ghana_df), 1) * 100, 
                   width, label='Ghana (%)', alpha=0.9, color='#006B3F')  # Ghana green
    
    ax.set_xlabel('Cluster', fontsize=12)
    ax.set_ylabel('Percentage of Players', fontsize=12)
    ax.set_title('üá¨üá≠ Cluster Distribution: Ghana vs All Forwards', fontsize=14, fontweight='bold')
    ax.set_xticks(x)
    ax.set_xticklabels([f'C{i}' for i in all_counts.index])
    ax.legend()
    ax.grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(OUTPUT_DIR / 'ghana_cluster_distribution.png', dpi=150, bbox_inches='tight')
    plt.show()

## 6. Individual Player Profiles

In [None]:
# Key stats for comparison
key_stats = [
    'Performance_Gls_per90', 'Performance_Ast_per90', 'Expected_xG_per90',
    'Expected_xAG_per90', 'Standard_Sh_per90', 'Standard_SoT_per90',
    'Progression_PrgC_per90', 'Progression_PrgP_per90', 'Take-Ons_Succ_per90',
    'SCA_SCA_per90', 'GCA_GCA_per90', 'KP_per90'
]

# Only use available columns
available_stats = [s for s in key_stats if s in df.columns]

if not ghana_df.empty:
    print("\n" + "="*80)
    print("üá¨üá≠ GHANA ATTACKERS - KEY STATISTICS")
    print("="*80)
    
    ghana_stats = ghana_df[['ghana_name', 'cluster'] + available_stats].copy()
    
    # Clean column names for display
    display_stats = ghana_stats.copy()
    display_stats.columns = ['Player', 'Cluster'] + [c.replace('_per90', '').replace('_', ' ') for c in available_stats]
    
    display(display_stats.style.format({col: '{:.2f}' for col in display_stats.columns if col not in ['Player', 'Cluster']}))

In [None]:
# Percentile ranking within all forwards
if not ghana_df.empty:
    print("\nüìä GHANA ATTACKERS - PERCENTILE RANKINGS (vs All Forwards)")
    print("-" * 60)
    
    for _, player_row in ghana_df.iterrows():
        player_name = player_row['ghana_name']
        cluster_id = player_row['cluster']
        
        print(f"\n{player_name} (Cluster {cluster_id})")
        print("-" * 40)
        
        for stat in available_stats[:8]:  # Top 8 key stats
            player_val = player_row[stat]
            percentile = (df[stat] < player_val).mean() * 100
            
            # Visual bar
            bar = "‚ñà" * int(percentile / 10) + "‚ñë" * (10 - int(percentile / 10))
            
            stat_name = stat.replace('_per90', '').replace('_', ' ')[:20].ljust(20)
            print(f"  {stat_name}: {bar} {percentile:5.1f}% ({player_val:.2f})")

## 7. Radar Charts for Each Player (mplsoccer)

In [None]:
# Define radar chart features (cleaner names for display)
radar_stats = [
    'Performance_Gls_per90', 'Performance_Ast_per90', 'Expected_xG_per90',
    'Expected_xAG_per90', 'Take-Ons_Succ_per90', 'Progression_PrgC_per90',
    'SCA_SCA_per90', 'KP_per90'
]
radar_stats = [f for f in radar_stats if f in df.columns]

# Clean names for display
radar_labels = [s.replace('_per90', '').replace('_', ' ').replace('Performance ', '').replace('Expected ', '')
                for s in radar_stats]

print(f"üìä Radar chart features: {radar_labels}")

In [None]:
def get_player_percentiles(player_row, df_all, features):
    """Calculate percentile values for a player across features"""
    percentiles = []
    for feat in features:
        val = player_row[feat]
        pct = (df_all[feat] < val).mean() * 100
        percentiles.append(pct)
    return percentiles

def create_mplsoccer_radar(player_row, df_all, features, labels, title, color='#006B3F'):
    """
    Create a professional football radar chart using mplsoccer
    """
    # Calculate percentiles
    values = get_player_percentiles(player_row, df_all, features)
    
    # Min and max for each parameter (percentiles: 0-100)
    low = [0] * len(features)
    high = [100] * len(features)
    
    # Create radar
    radar = Radar(labels, low, high,
                  round_int=[False]*len(features),
                  num_rings=4,
                  ring_width=1, center_circle_radius=1)
    
    # Create figure
    fig, ax = radar.setup_axis()
    
    # Plot radar
    rings_inner = radar.draw_circles(ax=ax, facecolor='#e8e8e8', edgecolor='#c0c0c0')
    radar_output = radar.draw_radar(values, ax=ax,
                                     kwargs_radar={'facecolor': color, 'alpha': 0.6},
                                     kwargs_rings={'facecolor': color, 'alpha': 0.1})
    
    # Add range labels and parameter labels
    range_labels = radar.draw_range_labels(ax=ax, fontsize=8, color='#666666')
    param_labels = radar.draw_param_labels(ax=ax, fontsize=10, color='#222222')
    
    # Title
    ax.set_title(title, fontsize=14, fontweight='bold', color=color, pad=20)
    
    return fig, ax

def create_matplotlib_radar(player_row, df_all, features, labels, title, color='#006B3F'):
    """
    Fallback matplotlib radar chart if mplsoccer not available
    """
    values = get_player_percentiles(player_row, df_all, features)
    
    N = len(features)
    angles = [n / float(N) * 2 * pi for n in range(N)]
    angles += angles[:1]
    values += values[:1]
    
    fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))
    
    ax.set_theta_offset(pi / 2)
    ax.set_theta_direction(-1)
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(labels, size=9)
    ax.set_ylim(0, 100)
    
    ax.plot(angles, values, 'o-', linewidth=2, color=color)
    ax.fill(angles, values, alpha=0.25, color=color)
    ax.set_title(title, size=12, color=color, y=1.08, fontweight='bold')
    
    return fig, ax

In [None]:
# Create radar charts for each Ghana player
if not ghana_df.empty:
    # Ghana colors
    ghana_colors = ['#006B3F', '#FCD116', '#CE1126', '#006B3F', '#FCD116', '#CE1126'] * 3
    
    for idx, (_, player) in enumerate(ghana_df.iterrows()):
        player_name = player['ghana_name']
        cluster_id = player['cluster']
        title = f"{player_name} (Cluster {cluster_id})"
        color = ghana_colors[idx % len(ghana_colors)]
        
        if MPLSOCCER_AVAILABLE:
            fig, ax = create_mplsoccer_radar(player, df, radar_stats, radar_labels, title, color)
        else:
            fig, ax = create_matplotlib_radar(player, df, radar_stats, radar_labels, title, color)
        
        plt.tight_layout()
        filename = OUTPUT_DIR / f'ghana_radar_{player_name.replace(" ", "_").replace("√±", "n")}.png'
        plt.savefig(filename, dpi=150, bbox_inches='tight', facecolor='white')
        plt.show()
        print(f"üíæ Saved: {filename}")

## 8. Compare Ghana Players to Cluster Peers

In [None]:
# For each Ghana player, compare to other players in same cluster
if not ghana_df.empty:
    print("\n" + "="*80)
    print("üá¨üá≠ GHANA ATTACKERS vs CLUSTER PEERS")
    print("="*80)
    
    for _, player_row in ghana_df.iterrows():
        player_name = player_row['ghana_name']
        cluster_id = player_row['cluster']
        
        # Get cluster peers
        cluster_peers = df[df['cluster'] == cluster_id]
        
        print(f"\n\n{'='*60}")
        print(f"{player_name} - Cluster {cluster_id}")
        print(f"{'='*60}")
        print(f"Cluster size: {len(cluster_peers)} players")
        
        # Rank within cluster
        print(f"\nRank within cluster (selected metrics):")
        for stat in available_stats[:6]:
            player_val = player_row[stat]
            rank = (cluster_peers[stat] > player_val).sum() + 1
            total = len(cluster_peers)
            stat_name = stat.replace('_per90', '').replace('_', ' ')[:25].ljust(25)
            print(f"  {stat_name}: #{rank:3d} / {total} ({player_val:.2f})")
        
        # Top 3 peers in cluster by goals
        if 'Performance_Gls_per90' in cluster_peers.columns:
            top_peers = cluster_peers.nlargest(5, 'Performance_Gls_per90')[[player_col, 'Squad', 'Performance_Gls_per90']]
            print(f"\nTop 5 in cluster by Goals per 90:")
            for i, (_, peer) in enumerate(top_peers.iterrows(), 1):
                is_ghana = peer[player_col] == player_row[player_col]
                marker = "‚≠ê" if is_ghana else "  "
                print(f"{marker} {i}. {peer[player_col]} ({peer['Squad']}) - {peer['Performance_Gls_per90']:.2f}")

## 9. Head-to-Head Player Comparison (mplsoccer)

In [None]:
def create_comparison_radar(player1_name, player2_name, ghana_df, df_all, features, labels):
    """
    Create a comparison radar chart with two players
    """
    # Find players
    p1_data = ghana_df[ghana_df['ghana_name'] == player1_name]
    p2_data = ghana_df[ghana_df['ghana_name'] == player2_name]
    
    if p1_data.empty or p2_data.empty:
        print(f"‚ùå Could not find both players")
        return
    
    values1 = get_player_percentiles(p1_data.iloc[0], df_all, features)
    values2 = get_player_percentiles(p2_data.iloc[0], df_all, features)
    
    # Min and max for each parameter (percentiles: 0-100)
    low = [0] * len(features)
    high = [100] * len(features)
    
    if MPLSOCCER_AVAILABLE:
        # Create radar with mplsoccer
        radar = Radar(labels, low, high,
                      round_int=[False]*len(features),
                      num_rings=4,
                      ring_width=1, center_circle_radius=1)
        
        fig, ax = radar.setup_axis()
        
        rings_inner = radar.draw_circles(ax=ax, facecolor='#e8e8e8', edgecolor='#c0c0c0')
        
        # Draw both players
        radar_output1 = radar.draw_radar(values1, ax=ax,
                                         kwargs_radar={'facecolor': '#006B3F', 'alpha': 0.5},
                                         kwargs_rings={'facecolor': '#006B3F', 'alpha': 0.1})
        radar_output2 = radar.draw_radar(values2, ax=ax,
                                         kwargs_radar={'facecolor': '#CE1126', 'alpha': 0.5},
                                         kwargs_rings={'facecolor': '#CE1126', 'alpha': 0.1})
        
        range_labels = radar.draw_range_labels(ax=ax, fontsize=8, color='#666666')
        param_labels = radar.draw_param_labels(ax=ax, fontsize=10, color='#222222')
        
        # Add legend
        ax.legend([radar_output1[0], radar_output2[0]], [player1_name, player2_name],
                  loc='upper right', fontsize=10)
    else:
        # Fallback matplotlib
        N = len(features)
        angles = [n / float(N) * 2 * pi for n in range(N)]
        angles += angles[:1]
        values1 += values1[:1]
        values2 += values2[:1]
        
        fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(polar=True))
        
        ax.set_theta_offset(pi / 2)
        ax.set_theta_direction(-1)
        ax.set_xticks(angles[:-1])
        ax.set_xticklabels(labels, fontsize=10)
        ax.set_ylim(0, 100)
        
        ax.plot(angles, values1, 'o-', linewidth=2, label=player1_name, color='#006B3F')
        ax.fill(angles, values1, alpha=0.25, color='#006B3F')
        ax.plot(angles, values2, 'o-', linewidth=2, label=player2_name, color='#CE1126')
        ax.fill(angles, values2, alpha=0.25, color='#CE1126')
        
        ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
    
    ax.set_title(f'{player1_name} vs {player2_name}\n(Percentile Rankings)', 
                 fontsize=14, fontweight='bold', pad=20)
    
    plt.tight_layout()
    filename = OUTPUT_DIR / f'ghana_comparison_{player1_name.replace(" ", "_")}_vs_{player2_name.replace(" ", "_")}.png'
    plt.savefig(filename, dpi=150, bbox_inches='tight', facecolor='white')
    plt.show()
    print(f"üíæ Saved: {filename}")

In [None]:
# Create head-to-head comparisons
if not ghana_df.empty:
    comparison_pairs = [
        ("Mohammed Kudus", "Antoine Semenyo"),
        ("Fatawu Issahaku", "Kamaldeen Sulemana"),
        ("I√±aki Williams", "Jordan Ayew")
    ]
    
    for p1, p2 in comparison_pairs:
        if p1 in found_players and p2 in found_players:
            print(f"\nüìä Creating comparison: {p1} vs {p2}")
            create_comparison_radar(p1, p2, ghana_df, df, radar_stats, radar_labels)

## 10. Gap Analysis - Squad Composition

In [None]:
# Compare Ghana squad to all cluster types
if not ghana_df.empty:
    print("\n" + "="*80)
    print("üîç SQUAD GAP ANALYSIS")
    print("="*80)
    
    # Get all cluster types
    all_clusters = set(df['cluster'].unique())
    ghana_clusters = set(ghana_df['cluster'].unique())
    
    # Missing roles
    missing_clusters = all_clusters - ghana_clusters
    
    print(f"\nüìä Ghana has players in {len(ghana_clusters)} of {len(all_clusters)} cluster types")
    
    if missing_clusters:
        print("\n‚ùå MISSING PLAYER TYPES (Gap in squad):")
        for c in sorted(missing_clusters):
            print(f"   ‚Ä¢ Cluster {c}: {cluster_names.get(c, 'Unknown')}")
    
    # Over-represented roles
    cluster_counts = ghana_df['cluster'].value_counts()
    
    overrepresented = cluster_counts[cluster_counts >= 3].index.tolist()
    if overrepresented:
        print("\n‚ö†Ô∏è OVER-REPRESENTED PLAYER TYPES (Consider variety):")
        for c in overrepresented:
            count = cluster_counts[c]
            players = ghana_df[ghana_df['cluster'] == c]['ghana_name'].tolist()
            print(f"   ‚Ä¢ Cluster {c}: {count} players - {', '.join(players)}")

## 11. PCA Visualization with Ghana Highlighted

In [None]:
# Scatter plot with Ghana highlighted
if 'pca_1' in df.columns and 'pca_2' in df.columns and not ghana_df.empty:
    fig, ax = plt.subplots(figsize=(14, 10))
    
    # Plot all players by cluster
    n_clusters = df['cluster'].nunique()
    colors = plt.cm.tab20(np.linspace(0, 1, n_clusters))
    
    for cluster_id in range(n_clusters):
        mask = df['cluster'] == cluster_id
        ax.scatter(df.loc[mask, 'pca_1'], df.loc[mask, 'pca_2'], 
                   c=[colors[cluster_id]], label=f'Cluster {cluster_id}', 
                   alpha=0.3, s=30)
    
    # Highlight Ghana players with star
    ax.scatter(ghana_df['pca_1'], ghana_df['pca_2'], 
               c='#006B3F', s=200, marker='*', edgecolors='#FCD116', 
               linewidths=2, label='Ghana', zorder=5)
    
    # Add labels for Ghana players
    for _, player in ghana_df.iterrows():
        ax.annotate(player['ghana_name'], 
                    (player['pca_1'], player['pca_2']),
                    xytext=(10, 5), textcoords='offset points',
                    fontsize=9, fontweight='bold', color='#006B3F',
                    bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.7))
    
    ax.set_xlabel('PC1', fontsize=12)
    ax.set_ylabel('PC2', fontsize=12)
    ax.set_title('üá¨üá≠ PCA Visualization - Ghana Black Stars Highlighted', 
                 fontsize=14, fontweight='bold')
    ax.legend(bbox_to_anchor=(1.02, 1), loc='upper left')
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(OUTPUT_DIR / 'ghana_pca_visualization.png', dpi=150, bbox_inches='tight')
    plt.show()
else:
    print("‚ö†Ô∏è PCA columns not found - run Notebook 03 first")

## 12. Summary

In [None]:
if not ghana_df.empty:
    ghana_clusters = ghana_df['cluster'].unique()
    
    print("\n" + "="*80)
    print("üá¨üá≠ GHANA BLACK STARS CLUSTER ANALYSIS SUMMARY")
    print("="*80)
    
    print(f"\nüìä Total Ghana attackers analyzed: {len(ghana_df)}")
    print(f"üìä Players found: {len(found_players)}/{len(GHANA_PLAYERS)}")
    print(f"üìä Spread across {len(ghana_clusters)} different clusters")
    
    if not_found_players:
        print(f"\n‚ùå Players not found in data: {', '.join(not_found_players)}")
    
    print("\nüîµ PLAYERS BY CLUSTER TYPE:")
    for cluster_id in sorted(ghana_clusters):
        cluster_players = ghana_df[ghana_df['cluster'] == cluster_id]
        desc = cluster_names.get(cluster_id, f"Cluster {cluster_id}")
        players = cluster_players['ghana_name'].tolist()
        
        print(f"\nCluster {cluster_id}:")
        print(f"  Profile: {desc}")
        print(f"  Players: {', '.join(players)}")
    
    print("\n" + "="*80)
    print("‚úÖ Analysis complete! Check the outputs folder for saved visualizations.")

In [None]:
# Save Ghana analysis data
if not ghana_df.empty:
    output_file = OUTPUT_DIR / "ghana_analysis.csv"
    ghana_df.to_csv(output_file, index=False)
    print(f"üíæ Saved Ghana analysis: {output_file}")

---
## ‚úÖ Done! 

### Deliverables Created:
1. `ghana_radar_*.png` - Individual radar charts for each player (mplsoccer)
2. `ghana_comparison_*.png` - Head-to-head comparison charts
3. `ghana_pca_visualization.png` - PCA scatter with Ghana highlighted
4. `ghana_cluster_distribution.png` - Cluster distribution comparison
5. `ghana_analysis.csv` - Full player data with clusters