In [None]:
import pandas as pd 
import os
df = pd.read_csv('fpl-data-stats.csv')
df.describe()

# 1Ô∏è‚É£ Data Loading & Overview

In [None]:
# Dataset Overview and Structure
print("=== DATASET OVERVIEW ===")
print(f"Dataset Shape: {df.shape}")
print(f"Total Records: {df.shape[0]:,}")
print(f"Total Features: {df.shape[1]}")
print("\n=== COLUMN NAMES ===")
print(df.columns.tolist())

print("\n=== DATA TYPES ===")
print(df.dtypes)

print("\n=== BASIC INFO ===")
df.info()

In [None]:
# Missing Values Analysis
print("=== MISSING VALUES ANALYSIS ===")
missing_values = df.isnull().sum()
missing_percentage = (missing_values / len(df)) * 100

missing_df = pd.DataFrame({
    'Column': missing_values.index,
    'Missing Count': missing_values.values,
    'Missing Percentage': missing_percentage.values
}).sort_values('Missing Count', ascending=False)

# Display only columns with missing values
if missing_df['Missing Count'].sum() > 0:
    print(missing_df[missing_df['Missing Count'] > 0])
else:
    print("No missing values found in the dataset!")

print(f"\nTotal missing values in dataset: {missing_values.sum():,}")
print(f"Percentage of complete records: {((len(df) - missing_values.sum()) / len(df)) * 100:.2f}%")

df = df.drop(columns=['penalty_area_touches', 'touches'])

# 2Ô∏è‚É£ Data Cleaning & Processing

In [None]:
# Separate Numerical and Categorical Variables
import numpy as np

# Identify numerical and categorical columns
numerical_cols = df.select_dtypes(include=[np.number]).columns.tolist()
categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()

print("=== VARIABLE TYPES ===")
print(f"Numerical variables ({len(numerical_cols)}): {numerical_cols}")
print(f"\nCategorical variables ({len(categorical_cols)}): {categorical_cols}")

# For categorical variables, show unique values
# For categorical variables, show unique values
print("\n=== CATEGORICAL VARIABLES ANALYSIS ===")
for col in categorical_cols[:10]:  # Show first 10 categorical columns
    unique_count = df[col].nunique()
    print(f"\n{col}:")
    print(f"  - Unique values: {unique_count}")
    if unique_count <= 20:  # Show values if not too many
        # Convert all values to string for sorting (to avoid errors)
        print(f"  - Values: {sorted(df[col].astype(str).unique())}")
    else:
        print(f"  - Top 10 values: {df[col].value_counts().head(10).index.tolist()}")


In [None]:
# Filter useful numerical variables for FPL analysis
print("=== FILTERING USEFUL NUMERICAL VARIABLES ===")

# Define categories of useful variables
core_performance = ['total_points', 'minutes', 'now_cost', 'selected_by_percent']
attacking_metrics = ['G', 'A', 'xG', 'xA', 'shots', 'SoT', 'key_passes']
expected_metrics = ['xG', 'xA', 'xGI', 'npxG', 'npxGI', 'xP']
defensive_metrics = ['CS', 'xCS', 'GC', 'xGC', 'tackles', 'recoveries', 
                    'clearances_blocks_interceptions', 'defensive_contribution']
advanced_metrics = ['PvsxP', 'carries_final_third', 'carries_penalty_area']

# Combine into useful variables list
useful_numerical_vars = list(set(core_performance + attacking_metrics + 
                                expected_metrics + defensive_metrics + advanced_metrics))

# Filter only variables that exist in the dataset
useful_vars_available = [var for var in useful_numerical_vars if var in numerical_cols]

print(f"Original numerical variables: {len(numerical_cols)}")
print(f"Useful numerical variables: {len(useful_vars_available)}")
print(f"Variables removed: {len(numerical_cols) - len(useful_vars_available)}")

print(f"\n=== USEFUL VARIABLES BY CATEGORY ===")
print(f"Core Performance: {[v for v in core_performance if v in useful_vars_available]}")
print(f"Attacking Metrics: {[v for v in attacking_metrics if v in useful_vars_available]}")
print(f"Expected Stats: {[v for v in expected_metrics if v in useful_vars_available]}")
print(f"Defensive Metrics: {[v for v in defensive_metrics if v in useful_vars_available]}")
print(f"Advanced Metrics: {[v for v in advanced_metrics if v in useful_vars_available]}")

# Variables to exclude (less useful for FPL analysis)
excluded_vars = [var for var in numerical_cols if var not in useful_vars_available]
print(f"\n=== EXCLUDED VARIABLES ===")
print(f"Less useful for FPL: {excluded_vars}")

# Create filtered dataset with useful variables only
useful_numerical_df = df[useful_vars_available].copy()
print(f"\n=== FILTERED DATASET INFO ===")
print(f"Shape: {useful_numerical_df.shape}")
print(f"Useful numerical variables: {useful_vars_available}")

In [None]:
import pandas as pd
import warnings

# Define team short name mapping
team_short_names = {
    'Liverpool': 'LIV',
    'Man City': 'MCI',
    'Man Utd': 'MUN',
    'Chelsea': 'CHE',
    'Crystal Palace': 'CRY',
    'Bournemouth': 'BOU',
    'Spurs': 'TOT',
    'Everton': 'EVE',
    "Nott'm Forest": 'NFO',
    'Brighton': 'BHA',
    'Newcastle': 'NEW',
    'West Ham': 'WHU',
    'Sunderland': 'SUN',
    'Fulham': 'FUL',
    'Leeds': 'LEE',
    'Aston Villa': 'AVL',
    'Brentford': 'BRE',
    'Wolves': 'WOL',
    'Burnley': 'BUR'
}

# üîß FIXED: Standardized to 5 gameweeks to match team rankings calculation
FORM_GAMEWEEKS = 5  # Changed from 3 to 5 for consistency

def add_team_short_names(season_data: pd.DataFrame) -> pd.DataFrame:
    """
    Add team_name_short column to season_data based on team_name mapping.
    
    Args:
        season_data: DataFrame containing player season statistics
    Returns:
        Updated DataFrame with team_name_short column
    """
    # Create a copy to avoid modifying the original
    season_data = season_data.copy()
    
    # Print unique team names for diagnostics
    unique_teams = season_data['team_name'].unique()
    print("Unique team names in dataframe:", unique_teams)
    
    # Normalize team names for mapping (case-insensitive, strip spaces/punctuation)
    normalized_mapping = {k.lower().replace("'", "").strip(): v for k, v in team_short_names.items()}
    
    # Add team_name_short column with normalized matching
    def map_team_name(team_name):
        if pd.isna(team_name):
            return None
        normalized_name = team_name.lower().replace("'", "").strip()
        return normalized_mapping.get(normalized_name, team_name[:3].upper())  # Default to first 3 letters if unmapped
    
    season_data['team_name_short'] = season_data['team_name'].apply(map_team_name)
    
    # Check for unmapped team names (NaN or defaulted to first 3 letters)
    unmapped_teams = season_data[
        season_data['team_name_short'].isna() | 
        ~season_data['team_name'].str.lower().replace("'", "").str.strip().isin(normalized_mapping.keys())
    ]['team_name'].unique()
    if len(unmapped_teams) > 0:
        warnings.warn(f"Unmapped team names (assigned default short names): {unmapped_teams}. Consider updating the team_short_names mapping.")
    
    return season_data


# Apply the mapping
df = add_team_short_names(df)

In [None]:
# Display the first 20 rows of the dataset
print("=== TOP 20 ROWS OF DATASET ===")
print(df.head(20))


In [None]:
# Outlier Detection and Analysis
print("=== OUTLIER DETECTION ===")

def detect_outliers_iqr(df, column):
    """Detect outliers using IQR method"""
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    
    outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)]
    return outliers, lower_bound, upper_bound

# Analyze outliers for key metrics
key_metrics = ['total_points', 'now_cost', 'selected_by_percent', 'minutes']

for metric in key_metrics:
    if metric in df.columns and df[metric].notna().sum() > 0:
        outliers, lower, upper = detect_outliers_iqr(df, metric)
        print(f"\n{metric.upper()}:")
        print(f"  Normal range: {lower:.2f} to {upper:.2f}")
        print(f"  Number of outliers: {len(outliers)}")
        print(f"  Percentage of outliers: {(len(outliers) / len(df)) * 100:.2f}%")
        
        if len(outliers) > 0 and len(outliers) <= 10:
            print("  Top outliers:")
            top_outliers = outliers.nlargest(10, metric)[['web_name', 'team_name', metric]]
            for _, player in top_outliers.iterrows():
                print(f"    {player['web_name']} ({player['team_name']}): {player[metric]}")


In [None]:
"""
DATA QUALITY FILTER #1: Remove players with 0 minutes

PROBLEM: Some players are assigned to teams they never actually played for.
- They show up in the data with 0 minutes across all gameweeks
- Their CS/GC stats contaminate team defensive rankings
- Example: Villa had 6 such players (Hollingshead, Proctor, Rowe, etc.) bringing their 
  defense ranking from actual #16 to calculated #20

SOLUTION: Filter out any player-team combination where total minutes = 0
"""
print("=" * 60)
print("FILTERING OUT PLAYERS WITH 0 MINUTES")
print("=" * 60)

# Calculate total minutes played per player-team combination
minutes_by_player_team = df.groupby(['id', 'team_name', 'web_name'])['minutes'].sum().reset_index()

# Identify player-team combinations with 0 minutes
zero_minute_combos = minutes_by_player_team[minutes_by_player_team['minutes'] == 0]

print(f"\nFound {len(zero_minute_combos)} player-team combinations with 0 minutes:")
print(f"  - These players never actually played for the team they're assigned to")
print(f"  - They contaminate team statistics (CS, GC, xCS, xGC, etc.)")

# Show examples
if len(zero_minute_combos) > 0:
    print("\nExamples of 0-minute players:")
    for idx, row in zero_minute_combos.head(10).iterrows():
        player_games = len(df[(df['id'] == row['id']) & (df['team_name'] == row['team_name'])])
        print(f"  - {row['web_name']} ({row['team_name']}): {player_games} games, 0 minutes")

# Filter out rows where player has 0 minutes for that team
print(f"\nOriginal dataset size: {len(df):,} rows")

# Create a set of valid player-team combinations (those with minutes > 0)
valid_combos = set(
    zip(
        minutes_by_player_team[minutes_by_player_team['minutes'] > 0]['id'],
        minutes_by_player_team[minutes_by_player_team['minutes'] > 0]['team_name']
    )
)

# Filter df to keep only valid combinations
df = df[df.apply(lambda row: (row['id'], row['team_name']) in valid_combos, axis=1)].copy()

print(f"After filtering: {len(df):,} rows")
print(f"Removed: {14175 - len(df):,} rows with 0 minutes")
print(f"\n‚úÖ Dataset now contains only players who actually played for their teams")
print("=" * 60)

In [None]:
"""
DATA QUALITY FILTER #2: Create unique player-team tracking key

PROBLEM: Some players share the same display name (web_name):
- Barnes at Burnley vs Barnes at Newcastle (different players)
- Martinez at Villa vs Martinez at Man Utd (different players)
- 7 total web_names shared by different players

SOLUTION: Use player 'id' (not web_name) for unique identification
"""
print("="*60)
print("CREATING PLAYER-TEAM TRACKING KEY")
print("="*60)

# Add a composite key using player ID and team name
df['player_team_key'] = df['id'].astype(str) + '|' + df['team_name']

# Count unique player-team combinations
unique_player_teams = df['player_team_key'].nunique()
unique_players = df['id'].nunique()

print(f"\nüìä Dataset now tracks:")
print(f"   {unique_players:,} unique players (by ID)")
print(f"   {unique_player_teams:,} unique player-team combinations")
print(f"   Difference: {unique_player_teams - unique_players} additional combinations due to transfers")

print("\n‚úÖ Using player ID avoids false duplicates from players with same display name")
print("=" * 60)

In [None]:
"""
DATA QUALITY FILTER #3: Detect player transfers between teams

PROBLEM: CS (Clean Sheets) and GC (Goals Conceded) are TEAM-level cumulative stats.
When a player transfers mid-season, we need to track their stats separately per team.

SOLUTION: Identify transferred players and use player_team_key for aggregation
"""
print("="*60)
print("DETECTING PLAYER TRANSFERS")
print("="*60)

# Find players (by ID) who have played for multiple teams
player_teams = df.groupby('id')['team_name'].nunique()
transferred_players = player_teams[player_teams > 1].sort_values(ascending=False)

print(f"\nüîç Found {len(transferred_players)} players who played for multiple teams:")
if len(transferred_players) > 0:
    print("\nPlayers with most team changes:")
    for player_id in transferred_players.head(10).index:
        player_name = df[df['id'] == player_id]['web_name'].iloc[0]
        teams = df[df['id'] == player_id]['team_name'].unique()
        games_per_team = df[df['id'] == player_id].groupby('team_name')['gameweek'].count()
        print(f"  {player_name} (ID: {player_id}): {list(teams)}")
        for team in teams:
            games = games_per_team[team]
            print(f"    - {team}: {games} games")

# Create a flag for transferred players
df['is_transferred_player'] = df['id'].isin(transferred_players.index)

print(f"\n‚úÖ Added 'is_transferred_player' flag to dataset")
print(f"   {df['is_transferred_player'].sum():,} rows involve transferred players")
print(f"   {len(transferred_players)} unique transferred players")

# Store the list for later validation
transferred_player_names = set(transferred_players.index)
print("=" * 60)

# 3Ô∏è‚É£ Exploratory Data Analysis

In [None]:
# Positional and Team Analysis
print("=== POSITIONAL ANALYSIS ===")

# Position mapping
position_map = {1: 'Goalkeeper', 2: 'Defender', 3: 'Midfielder', 4: 'Forward'}
df['position_name'] = df['element_type'].map(position_map)

# Analysis by position
position_stats = df.groupby('position_name').agg({
    'total_points': ['count', 'mean', 'median', 'max'],
    'now_cost': ['mean', 'median'],
    'minutes': ['mean'],
    'selected_by_percent': ['mean'],
    'G': ['mean'],
    'A': ['mean']
}).round(2)

print("Position Statistics:")
print(position_stats)

print("\n=== TEAM ANALYSIS ===")

# Team performance analysis
team_stats = df.groupby('team_name').agg({
    'total_points': ['count', 'sum', 'mean'],
    'now_cost': ['mean'],
    'selected_by_percent': ['mean'],
    'G': ['sum'],
    'A': ['sum'],
    'minutes': ['sum']
}).round(2)

team_stats.columns = ['_'.join(col) for col in team_stats.columns]
team_stats = team_stats.sort_values('total_points_sum', ascending=False)

print("\nTop 10 Teams by Total Points:")
print(team_stats.head(10)[['total_points_sum', 'total_points_mean', 'now_cost_mean']])

print("\n=== VALUE ANALYSIS BY POSITION ===")
# Calculate points per million by position
df['points_per_million'] = df['total_points'] / df['now_cost']

value_by_position = df[df['total_points'] > 0].groupby('position_name')['points_per_million'].agg([
    'count', 'mean', 'median', 'max'
]).round(2)

print(value_by_position)



# 5Ô∏è‚É£ Player Performance Analysis

In [None]:
# Aggregate gameweek data to create season statistics for each player
print("="*60)
print("CREATING SEASON STATISTICS FROM GAMEWEEK DATA")
print("="*60)

# üîß CRITICAL FIX: Group by player_team_key to handle transfers properly
# This ensures CS/GC stats are NOT mixed across teams for transferred players
print("\n‚ö†Ô∏è  Using player_team_key to prevent cross-team stat contamination")
print("   (transferred players will have separate entries per team)\n")

# Group by player-team combination (id) and aggregate all gameweek data
season_stats = df.groupby(['id', 'web_name', 'team_name', 'team_name_short', 'position_name', 'element_type', 'player_team_key']).agg({
    # Core stats
    'total_points': 'sum',
    'now_cost': 'last',
    'selected_by_percent': 'last',
    'minutes': 'sum',
    'gameweek': ['max', 'count'],
    
    # Attacking stats
    'G': 'sum',
    'A': 'sum',
    'xG': 'sum',
    'xA': 'sum',
    'xGI': 'sum',
    'shots': 'sum',
    'SoT': 'sum',
    'SiB': 'sum',
    'key_passes': 'sum',
    
    # Defensive stats
    'CS': 'sum',
    'GC': 'sum',
    'xCS': 'sum',
    'xGC': 'sum',
    'tackles': 'sum',
    'recoveries': 'sum',
    'clearances_blocks_interceptions': 'sum',
    'defensive_contribution': 'sum'
}).reset_index()

# Flatten multi-level columns
season_stats.columns = ['_'.join(col).strip('_') if col[1] else col[0] for col in season_stats.columns]

# Rename aggregated columns to clean names
season_stats.rename(columns={
    'now_cost_last': 'now_cost',
    'selected_by_percent_last': 'selected_by_percent',
    'gameweek_max': 'last_gameweek',
    'gameweek_count': 'games_played',
    'total_points_sum': 'season_points',
    'minutes_sum': 'season_minutes',
    'G_sum': 'season_goals',
    'A_sum': 'season_assists',
    'xG_sum': 'season_xG',
    'xA_sum': 'season_xA',
    'xGI_sum': 'season_xGI',
    'shots_sum': 'season_shots',
    'SoT_sum': 'season_SoT',
    'SiB_sum': 'season_SiB',
    'key_passes_sum': 'season_key_passes',
    'CS_sum': 'season_CS',
    'GC_sum': 'season_GC',
    'xCS_sum': 'season_xCS',
    'xGC_sum': 'season_xGC',
    'tackles_sum': 'season_tackles',
    'recoveries_sum': 'season_recoveries',
    'clearances_blocks_interceptions_sum': 'clearances_blocks_interceptions_sum',
    'defensive_contribution_sum': 'defensive_contribution_sum'
}, inplace=True)

# Calculate per-game metrics
season_stats['points_per_game'] = season_stats['season_points'] / season_stats['games_played']
season_stats['goals_per_game'] = season_stats['season_goals'] / season_stats['games_played']
season_stats['assists_per_game'] = season_stats['season_assists'] / season_stats['games_played']
season_stats['minutes_per_game'] = season_stats['season_minutes'] / season_stats['games_played']

# Calculate points per million (needed for hidden gems)
season_stats['points_per_million'] = season_stats['season_points'] / season_stats['now_cost']

# Round numerical columns
numeric_cols = season_stats.select_dtypes(include=[np.number]).columns
season_stats[numeric_cols] = season_stats[numeric_cols].round(2)

# üîß CRITICAL FIX: For transferred players, keep ONLY their most recent team
# This prevents contamination in team-level analysis while preserving all data
original_count = len(season_stats)
season_stats = season_stats.loc[season_stats.groupby('web_name')['games_played'].idxmax()]

print(f"Created season stats for {len(season_stats)} unique players")
print(f"  - Original entries (with transfers): {original_count}")
print(f"  - Removed {original_count - len(season_stats)} duplicate transfer entries")
print(f"  - For transferred players: kept most recent team only")
print(f"Data covers gameweeks 1-{df['gameweek'].max()}")

# Verify fix worked
if 'transferred_player_names' in dir() and len(transferred_player_names) > 0:
    transferred_in_stats = season_stats[season_stats['web_name'].isin(transferred_player_names)]
    print(f"\n‚úÖ Transferred players in season_stats: {len(transferred_in_stats)} (should equal {len(transferred_player_names)})")
    if len(transferred_in_stats) == len(transferred_player_names):
        print(f"   SUCCESS: Each transferred player has exactly ONE entry!")
    
season_stats.head(4)

In [None]:
# Calculate the required metrics
num_players = season_stats['web_name'].nunique()
total_teams = season_stats['team_name'].nunique()
total_gameweeks = season_stats['last_gameweek'].max()

# Create a summary DataFrame
layout_df = pd.DataFrame({
    'number_of_players': [num_players],
    'total_teams': [total_teams],
    'total_gameweeks': [total_gameweeks]
})

print("Layout Data:")
print(layout_df)
# Export to JSON file
layout_df.to_json('backend/data/layout.json', orient='records', indent=4)

print("Layout data exported to backend/data/layout.json")

In [None]:
# Top Performers
print("üèÜ === FPL KEY INSIGHTS & RECOMMENDATIONS ===")

# üîß FIXED: Calculate form for all players using last 5 gameweeks (was 3, now matches team rankings)
def calculate_player_form(player_name, team_name):
    """Calculate form as points per game from recent performances"""
    player_games = df[(df['web_name'] == player_name) & (df['team_name'] == team_name)]
    if len(player_games) == 0:
        print(f"Warning: No data for {player_name} ({team_name})")
        return None  # Changed from 5.0 to None for missing data
    
    # Get last 5 gameweeks (matches team rankings calculation)
    recent_games = player_games.nlargest(5, 'gameweek')
    if len(recent_games) == 0:
        print(f"Warning: No recent games for {player_name} ({team_name})")
        return None  # Changed from 5.0 to None
    
    avg_points = recent_games['total_points'].mean()
    element_type = player_games['element_type'].iloc[0] if 'element_type' in player_games else 3  # Default to MID if missing
    
    if element_type == 1:  # Goalkeeper
        form_score = min(10.0, max(0.0, avg_points * 1.2))
    elif element_type == 2:  # Defender
        form_score = min(10.0, max(0.0, avg_points * 1.1))
    else:  # Midfielder or Forward
        form_score = min(10.0, max(0.0, avg_points * 0.9))
    
    return round(form_score, 1)

# Add form to season_stats
season_stats['form'] = season_stats.apply(
    lambda row: calculate_player_form(row['web_name'], row['team_name']),
    axis=1
)

# Fill missing form with median to avoid NaN issues
if season_stats['form'].isna().any():
    median_form = season_stats['form'].median()
    season_stats['form'].fillna(median_form, inplace=True)
    print(f"‚ö†Ô∏è Filled {season_stats['form'].isna().sum()} missing form values with median: {median_form}")


# Now proceed with insights calculations
insights_data = {}

# 1. SEASON PERFORMERS - Top 15 by total points
print(f"\nüèÜ TOP SEASON PERFORMERS")
print("-" * 50)

top_scorers = season_stats.nlargest(15, 'season_points')
top_scorer_names = set(top_scorers['web_name'].values)

season_performers_data = []
for i, (_, player) in enumerate(top_scorers.iterrows(), 1):
    ppg = player['season_points'] / player['games_played'] if player['games_played'] > 0 else 0
    
    player_data = {
        "player": player['web_name'],
        "team": player['team_name'],
        "team_short": player['team_name_short'],
        "position": player['position_name'],
        "points": int(player['season_points']),
        "ppg": round(ppg, 1),
        "price": player['now_cost'],
        "ownership": player['selected_by_percent'],
        "form": player['form']
    }
    season_performers_data.append(player_data)
    
    print(f"{i}. {player['web_name']} ({player['position_name']}, {player['team_name']} [{player['team_name_short']}])")
    print(f"   {player['season_points']:.0f} pts ({ppg:.1f} ppg) | ¬£{player['now_cost']}m | {player['selected_by_percent']:.1f}% owned | Form: {player['form']}")

insights_data['season_performers'] = season_performers_data

# 2. VALUE PLAYERS - Best points per million (excluding top performers)
print(f"\nüí∞ BEST VALUE PLAYERS (Points per Million)")
print("-" * 50)

value_candidates = season_stats[
    (~season_stats['web_name'].isin(top_scorer_names)) & 
    (season_stats['season_points'] >= 15)
].copy()

value_candidates['points_per_million'] = value_candidates['season_points'] / value_candidates['now_cost']
value_players = value_candidates.nlargest(10, 'points_per_million')

value_players_data = []
for i, (_, player) in enumerate(value_players.iterrows(), 1):
    player_data = {
        "player": player['web_name'],
        "team": player['team_name'],
        "team_short": player['team_name_short'],
        "position": player['position_name'],
        "pointsPerMillion": round(player['points_per_million'], 2),
        "totalPoints": int(player['season_points']),
        "price": player['now_cost'],
        "form": player['form']
    }
    value_players_data.append(player_data)
    
    print(f"{i}. {player['web_name']} ({player['position_name']}, {player['team_name']} [{player['team_name_short']}])")
    print(f"   {player['points_per_million']:.2f} pts/¬£m | {player['season_points']:.0f} pts | ¬£{player['now_cost']}m | Form: {player['form']}")

insights_data['value_players'] = value_players_data

# 3. HIDDEN GEMS - Low ownership with strong underlying stats
print(f"\nüíé HIDDEN GEMS (Low Ownership + Strong Potential)")
print("-" * 50)

# Compute dynamic thresholds based on averages
avg_points = season_stats['season_points'].mean()
avg_form = season_stats['form'].mean()
min_games = 4  # Minimum games played, reasonable for Gameweek 6
min_xG = season_stats['season_xG'].mean() * 0.8  # 80% of average xG for attacking threat

print(f"Dynamic thresholds: Avg Points = {avg_points:.2f}, Avg Form = {avg_form:.2f}, Min xG = {min_xG:.2f}")

# Filter hidden gems using dynamic thresholds
hidden_gems = season_stats[
    (season_stats['season_points'] >= avg_points * 0.8) &  # 80% of average points
    (season_stats['selected_by_percent'] < 8) &
    (season_stats['selected_by_percent'] > 0) &
    (season_stats['games_played'] >= min_games) &
    (season_stats['season_xG'] >= min_xG) &  # Dynamic xG threshold
    (season_stats['form'] >= avg_form * 0.8) &  # 80% of average form
    (~season_stats['web_name'].isin(top_scorer_names))
]

# Replace position names
hidden_gems['position_name'] = hidden_gems['position_name'].replace({
    'Forward': 'FWD',
    'Midfielder': 'MID',
    'Defender': 'DEF',
    'Goalkeeper': 'GK'
})

hidden_gems_data = []

if len(hidden_gems) > 0:
    hidden_gems = hidden_gems.copy()

    # Define metrics for z-score calculation
    metrics = [
        'season_xG', 'season_xA', 'season_xCS', 'season_key_passes',
        'form', 'points_per_game', 'goals_per_game', 'assists_per_game',
        'points_per_million', 'minutes_per_game',
        'season_tackles', 'season_recoveries', 'defensive_contribution_sum'
    ]

    # Calculate z-scores for each metric
    for metric in metrics:
        if metric in hidden_gems.columns:
            mean = hidden_gems[metric].mean()
            std = hidden_gems[metric].std()
            if std > 0:
                hidden_gems[f'{metric}_z'] = (hidden_gems[metric] - mean) / std
            else:
                hidden_gems[f'{metric}_z'] = 0

    # Calculate per-90 metrics (normalize by minutes played)
    hidden_gems['xG_per_game'] = hidden_gems['season_xG'] / (hidden_gems['season_minutes'] / 90)
    hidden_gems['xA_per_game'] = hidden_gems['season_xA'] / (hidden_gems['season_minutes'] / 90)

    # Calculate potential score based on position
    for idx, player in hidden_gems.iterrows():
        position = player['position_name']
        
        # Position-specific potential scores
        if position == 'FWD':
            potential_score = (
                player.get('season_xG_z', 0) * 0.30 +
                player.get('season_xA_z', 0) * 0.20 +
                player.get('form_z', 0) * 0.35 +
                player.get('points_per_game_z', 0) * 0.15 +
                player.get('points_per_million_z', 0) * 0.10
            )
        elif position == 'MID':
            potential_score = (
                player.get('season_xG_z', 0) * 0.25 +
                player.get('season_xA_z', 0) * 0.25 +
                player.get('form_z', 0) * 0.25 +
                player.get('season_key_passes_z', 0) * 0.15 +
                player.get('points_per_million_z', 0) * 0.10
            )
        elif position == 'DEF':
            potential_score = (
                player.get('season_xCS_z', 0) * 0.30 +
                player.get('season_xA_z', 0) * 0.15 +
                player.get('form_z', 0) * 0.25 +
                player.get('defensive_contribution_sum_z', 0) * 0.20 +
                player.get('points_per_million_z', 0) * 0.10
            )
        elif position == 'GK':
            potential_score = (
                player.get('season_xCS_z', 0) * 0.35 +
                player.get('form_z', 0) * 0.25 +
                player.get('points_per_game_z', 0) * 0.20 +
                player.get('points_per_million_z', 0) * 0.20
            )
        else:
            potential_score = 0

        hidden_gems.loc[idx, 'potential_score'] = potential_score

    # Normalize potential score to 0-10 scale
    if hidden_gems['potential_score'].std() > 0:
        min_score = hidden_gems['potential_score'].min()
        max_score = hidden_gems['potential_score'].max()
        hidden_gems['potential_score'] = ((hidden_gems['potential_score'] - min_score) / (max_score - min_score)) * 10
    
    # Sort by potential score
    hidden_gems_sorted = hidden_gems.nlargest(10, 'potential_score')
    
    for i, (_, player) in enumerate(hidden_gems_sorted.iterrows(), 1):
        player_data = {
            "player": player['web_name'],
            "team": player['team_name'],
            "team_short": player['team_name_short'],
            "position": player['position_name'],
            "points": int(player['season_points']),
            "ppg": round(player['points_per_game'], 2),
            "xG": round(player['season_xG'], 1),
            "xA": round(player['season_xA'], 1),
            "ownership": player['selected_by_percent'],
            "price": player['now_cost'],
            "form": player['form'],
            "potentialScore": round(player['potential_score'], 1)
        }
        hidden_gems_data.append(player_data)
        
        print(f"{i}. {player['web_name']} ({player['position_name']}, {player['team_name']} [{player['team_name_short']}])")
        print(f"   {player['season_points']:.0f} pts ({player['points_per_game']:.2f} ppg) | xG: {player['season_xG']:.1f}, xA: {player['season_xA']:.1f} | {player['selected_by_percent']:.1f}% owned | ¬£{player['now_cost']}m")
        print(f"   Potential Score: {player['potential_score']:.1f}/10 | Form: {player['form']}")
else:
    print("No hidden gems found with current thresholds")

insights_data['hidden_gems'] = hidden_gems_data

# 4. GOAL LEADERS - Top goal scorers
print(f"\n‚öΩ TOP GOAL SCORERS")
print("-" * 50)

goal_leaders = season_stats[season_stats['season_goals'] > 0].nlargest(15, 'season_goals')

goal_scorers_data = []
for i, (_, player) in enumerate(goal_leaders.iterrows(), 1):
    gpg = player['season_goals'] / player['games_played'] if player['games_played'] > 0 else 0
    
    player_data = {
        "player": player['web_name'],
        "team": player['team_name'],
        "team_short": player['team_name_short'],
        "goals": int(player['season_goals']),
        "goalsPerGame": round(gpg, 2),
        "xG": round(player['season_xG'], 1),
        "points": int(player['season_points']),
        "price": player['now_cost'],
        "ownership": player['selected_by_percent'],
        "form": player['form']
    }
    goal_scorers_data.append(player_data)
    
    print(f"{i}. {player['web_name']} ({player['team_name']} [{player['team_name_short']}])")
    print(f"   {player['season_goals']:.0f} goals ({gpg:.2f} per game) | xG: {player['season_xG']:.1f} | {player['season_points']:.0f} pts | ¬£{player['now_cost']}m | Form: {player['form']}")

insights_data['goal_scorers'] = goal_scorers_data

# 5. ASSIST LEADERS - Top assist providers
print(f"\nüéØ TOP ASSIST PROVIDERS")
print("-" * 50)

assist_leaders = season_stats[season_stats['season_assists'] > 0].nlargest(12, 'season_assists')

assist_providers_data = []
for i, (_, player) in enumerate(assist_leaders.iterrows(), 1):
    apg = player['season_assists'] / player['games_played'] if player['games_played'] > 0 else 0
    
    player_data = {
        "player": player['web_name'],
        "team": player['team_name'],
        "team_short": player['team_name_short'],
        "assists": int(player['season_assists']),
        "assistsPerGame": round(apg, 2),
        "points": int(player['season_points']),
        "price": player['now_cost'],
        "ownership": player['selected_by_percent'],
        "form": player['form']
    }
    assist_providers_data.append(player_data)
    
    print(f"{i}. {player['web_name']} ({player['team_name']} [{player['team_name_short']}])")
    print(f"   {player['season_assists']:.0f} assists ({apg:.2f} per game) | {player['season_points']:.0f} pts | ¬£{player['now_cost']}m | Form: {player['form']}")

insights_data['assist_providers'] = assist_providers_data

# 6. DEFENSIVE LEADERS - Best defenders/goalkeepers
print(f"\nüõ°Ô∏è DEFENSIVE LEADERS")
print("-" * 50)

defensive_candidates = season_stats[
    (season_stats['season_points'] >= 10) &
    (season_stats['games_played'] >= 3) &
    (season_stats['position_name'].isin(['Goalkeeper', 'Defender']))
].copy()

defensive_leaders_data = []
if len(defensive_candidates) > 0:
    defensive_candidates['defensive_score'] = (
        defensive_candidates['season_CS'] * 0.20 +  # Clean Sheets should be more heavily weighted
        defensive_candidates['season_tackles'] * 0.15 +  # Tackles are key for defensive performance
        defensive_candidates['season_recoveries'] * 0.15 +  # Recoveries are a crucial defensive stat
        defensive_candidates['season_xCS'] * 0.20 +  # xCS is predictive of future clean sheets, so it's very important
        defensive_candidates['defensive_contribution_sum'] * 0.10 +  # Overall defensive contributions
        defensive_candidates['clearances_blocks_interceptions_sum'] * 0.10 +  # Key to defensive stability
        (defensive_candidates['season_points'] / defensive_candidates['games_played']) * 0.10  # Points still matter, but not as much as the core defensive metrics
    )
    
    top_defenders = defensive_candidates.nlargest(10, 'defensive_score')
    print("üõ°Ô∏è Best Defensive Performers:")
    for i, (_, player) in enumerate(top_defenders.iterrows(), 1):
        cs_rate = (player['season_CS'] / player['games_played']) * 100 if player['games_played'] > 0 else 0
        ppg = player['season_points'] / player['games_played'] if player['games_played'] > 0 else 0
        
        player_data = {
            "player": player['web_name'],
            "team": player['team_name'],
            "team_short": player['team_name_short'],
            "position": player['position_name'],
            "points": int(player['season_points']),
            "ppg": round(ppg, 1),
            "cleanSheets": int(player['season_CS']),
            "csRate": round(cs_rate, 1),
            "tackles": int(player['season_tackles']) if player['season_tackles'] > 0 else 1,
            "defensiveContributions": int(player['defensive_contribution_sum']) if 'defensive_contribution_sum' in player and player['defensive_contribution_sum'] > 0 else 1,
            "price": player['now_cost'],
            "form": player['form']
        }
        defensive_leaders_data.append(player_data)
        
        print(f"  {i}. {player['web_name']} ({player['team_name']} [{player['team_name_short']}], {player['position_name']})")
        print(f"     {player['season_points']:.0f} pts ({ppg:.1f} ppg) | {player['season_CS']:.0f} CS ({cs_rate:.1f}%) | {player['season_tackles']:.0f} tackles | ¬£{player['now_cost']}m | Form: {player['form']}")

insights_data['defensive_leaders'] = defensive_leaders_data




# Export to JSON files
import json
import os

# Create output directory
output_dir = 'backend/data/top_performers'
os.makedirs(output_dir, exist_ok=True)

# Export each category to separate JSON files
for category, data in insights_data.items():
    filename = f'{output_dir}/{category}.json'
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    print(f"\n‚úÖ Exported {category}: {len(data)} players -> {filename}")

# Also create a combined file for convenience
combined_filename = f'{output_dir}/all_insights.json'
with open(combined_filename, 'w', encoding='utf-8') as f:
    json.dump(insights_data, f, indent=2, ensure_ascii=False)

print(f"\nüéâ ALL DATA EXPORTED TO JSON!")
print(f"üìÅ Output directory: {output_dir}/")
print(f"üìä Individual files: {list(insights_data.keys())}")
print(f"üì¶ Combined file: all_insights.json")

In [None]:
# 7. PERFORMANCE ANALYSIS
print(f"\nüìà OVERPERFORMANCE ANALYSIS")
print("-" * 50)

overperformers_data = []
sustainable_scorers_data = []
underperformers_data = []

if 'season_xG' in season_stats.columns and 'season_goals' in season_stats.columns:
    overperformance_candidates = season_stats[
        (season_stats['season_goals'] > 0) & 
        (season_stats['season_xG'] > 0) &
        (season_stats['games_played'] >= 3)
    ].copy()
    
    # Calculate overperformance and normalize by minutes played
    overperformance_candidates['goal_overperformance'] = overperformance_candidates['season_goals'] - overperformance_candidates['season_xG']
    if 'minutes_played' in season_stats.columns:
        overperformance_candidates['overperformance_per_90'] = overperformance_candidates['goal_overperformance'] / overperformance_candidates['minutes_played'] * 90
    else:
        overperformance_candidates['overperformance_per_90'] = overperformance_candidates['goal_overperformance'] / overperformance_candidates['games_played']

    # Dynamic threshold based on xG
    overperformance_candidates['threshold'] = 0.1 * overperformance_candidates['season_xG'].clip(lower=0.5)  # Minimum threshold of 0.5

    # Goal overperformers (regression risk)
    goal_overperformers = overperformance_candidates[
        overperformance_candidates['goal_overperformance'] > overperformance_candidates['threshold']
    ].nlargest(8, 'overperformance_per_90')
    
    print("‚ö° Top Goal Overperformers (Potential Regression Risk):")
    for i, (_, player) in enumerate(goal_overperformers.iterrows(), 1):
        player_data = {
            "player": player['web_name'],
            "team": player['team_name'],
            "team_short": player['team_name_short'],
            "goals": int(player['season_goals']),
            "xG": round(player['season_xG'], 1),
            "overperformance": round(player['goal_overperformance'], 1),
            "overperformance_per_90": round(player['overperformance_per_90'], 3),
            "sustainable": False,
            "form": player['form']
        }
        overperformers_data.append(player_data)
        shots_info = f" | Shots: {player['shots']:.0f}, SoT: {player['shots_on_target']:.0f}" if 'shots' in player and 'shots_on_target' in player else ""
        print(f"  {i}. {player['web_name']} ({player['team_name']} [{player['team_name_short']}]): {player['season_goals']:.0f} goals vs {player['season_xG']:.1f} xG (+{player['goal_overperformance']:.1f}) | Per 90: {player['overperformance_per_90']:.3f} | Form: {player['form']}{shots_info}")

    # Sustainable scorers (goals close to xG)
    sustainable_scorers = overperformance_candidates[
        abs(overperformance_candidates['goal_overperformance']) <= overperformance_candidates['threshold']
    ].nlargest(8, 'season_goals')  # Sort by goals for relevance
    print("\nüåü Sustainable Scorers (Consistent Performance):")
    for i, (_, player) in enumerate(sustainable_scorers.iterrows(), 1):
        player_data = {
            "player": player['web_name'],
            "team": player['team_name'],
            "team_short": player['team_name_short'],
            "goals": int(player['season_goals']),
            "xG": round(player['season_xG'], 1),
            "overperformance": round(player['goal_overperformance'], 1),
            "overperformance_per_90": round(player['overperformance_per_90'], 3),
            "sustainable": True,
            "form": player['form']
        }
        sustainable_scorers_data.append(player_data)
        shots_info = f" | Shots: {player['shots']:.0f}, SoT: {player['shots_on_target']:.0f}" if 'shots' in player and 'shots_on_target' in player else ""
        print(f"  {i}. {player['web_name']} ({player['team_name']} [{player['team_name_short']}]): {player['season_goals']:.0f} goals vs {player['season_xG']:.1f} xG ({player['goal_overperformance']:.1f}) | Per 90: {player['overperformance_per_90']:.3f} | Form: {player['form']}{shots_info}")

    # Underperformers (potential breakout candidates)
    goal_underperformers = overperformance_candidates[
        overperformance_candidates['goal_overperformance'] < -overperformance_candidates['threshold']
    ].nlargest(8, 'season_xG')  # Sort by xG for breakout potential
    print("\nüî• Goal Underperformers (Potential Breakout Candidates):")
    for i, (_, player) in enumerate(goal_underperformers.iterrows(), 1):
        player_data = {
            "player": player['web_name'],
            "team": player['team_name'],
            "team_short": player['team_name_short'],
            "goals": int(player['season_goals']),
            "xG": round(player['season_xG'], 1),
            "overperformance": round(player['goal_overperformance'], 1),
            "overperformance_per_90": round(player['overperformance_per_90'], 3),
            "sustainable": False,
            "form": player['form']
        }
        underperformers_data.append(player_data)
        shots_info = f" | Shots: {player['shots']:.0f}, SoT: {player['shots_on_target']:.0f}" if 'shots' in player and 'shots_on_target' in player else ""
        print(f"  {i}. {player['web_name']} ({player['team_name']} [{player['team_name_short']}]): {player['season_goals']:.0f} goals vs {player['season_xG']:.1f} xG ({player['goal_overperformance']:.1f}) | Per 90: {player['overperformance_per_90']:.3f} | Form: {player['form']}{shots_info}")

    # Export to JSON
    os.makedirs('backend/data/performance_analysis', exist_ok=True)
    with open('backend/data/performance_analysis/overperformers.json', 'w', encoding='utf-8') as f:
        json.dump(overperformers_data, f, indent=4, ensure_ascii=False) 
    with open('backend/data/performance_analysis/sustainable_scorers.json', 'w', encoding='utf-8') as f:
        json.dump(sustainable_scorers_data, f, indent=4 , ensure_ascii=False)
    with open('backend/data/performance_analysis/underperformers.json', 'w', encoding='utf-8') as f:
        json.dump(underperformers_data, f, indent=4 , ensure_ascii=False)
    print("\nExported performance data to backend/data/performance_analysis/")
else:
    print("‚ùå Missing required columns (season_xG or season_goals) for overperformance analysis")

# 7Ô∏è‚É£ Team Rankings

In [None]:
season_stats

In [None]:
import json

# üèÜ ENHANCED TEAM STRENGTH RANKINGS WITH FORM WEIGHTING + SOS ADJUSTMENT
print("="*70)
print("üìä COMPREHENSIVE TEAM STRENGTH RANKINGS")
print("="*70)
print("üí° Enhanced with recent form weighting (80% season avg, 20% last 5 GWs)")
print("üí° Includes all available defensive metrics for accurate fixture assessment")
print("üéØ NEW: Strength of Schedule (SOS) adjustment for opponent quality")

def create_comprehensive_team_strength_rankings(season_data: pd.DataFrame, raw_df: pd.DataFrame = None) -> pd.DataFrame:
    """
    Create comprehensive team strength rankings using all available defensive metrics.
    Enhanced calculation includes tackles, recoveries, clearances, and expected stats.
    
    ENHANCED FEATURES:
    - Form Weighting: Recent 5 gameweeks weighted 60%, season average 40%
    - SOS Adjustment: Stats adjusted based on opponent strength (prevents schedule luck)
    """
    
    # üìà CALCULATE RECENT FORM WITH STRENGTH OF SCHEDULE (SOS) ADJUSTMENT (Last 5 gameweeks)
    recent_form_stats = None
    if raw_df is not None:
        try:
            # Get the last gameweek number
            max_gw = raw_df['gameweek'].max()
            recent_gw_start = max(1, max_gw - 4)  # Last 5 gameweeks
            
            # Filter for recent gameweeks only
            recent_df = raw_df[raw_df['gameweek'] >= recent_gw_start].copy()
            
            # üéØ FIRST PASS: Calculate initial rankings (without SOS adjustment) for opponent quality assessment
            temp_attack = recent_df.groupby('team_name').agg({
                'G': 'sum', 'xG': 'sum', 'A': 'sum', 'xA': 'sum', 
                'shots': 'sum', 'key_passes': 'sum', 'gameweek': 'count'
            }).rename(columns={'gameweek': 'temp_games'})
            
            temp_defense = recent_df[recent_df['element_type'].isin([1, 2])].groupby('team_name').agg({
                'CS': 'sum', 'GC': 'sum', 'gameweek': 'nunique'
            }).rename(columns={'gameweek': 'temp_games'})
            
            # Calculate temporary strength scores
            temp_attack['temp_attack_strength'] = (
                (temp_attack['xG'] / temp_attack['temp_games']) * 0.4 +
                (temp_attack['G'] / temp_attack['temp_games']) * 0.3 +
                (temp_attack['shots'] / temp_attack['temp_games']) * 0.3
            )
            
            temp_defense['temp_defense_strength'] = (
                (temp_defense['CS'] / temp_defense['temp_games']) * 0.6 +
                (1 / ((temp_defense['GC'] / temp_defense['temp_games']) + 0.1)) * 0.4
            )
            
            # Create temporary rank mappings
            temp_attack_ranks = temp_attack['temp_attack_strength'].rank(ascending=False, method='dense').astype(int)
            temp_defense_ranks = temp_defense['temp_defense_strength'].rank(ascending=False, method='dense').astype(int)
            total_teams = max(len(temp_attack_ranks), len(temp_defense_ranks))
            
            # üéØ SOS ADJUSTMENT FUNCTION
            def get_sos_factor(opponent_rank, total_teams, is_attack=True):
                """
                Calculate Strength of Schedule adjustment factor based on opponent rank.
                
                For ATTACK: Facing weak defense (#20) ‚Üí easier ‚Üí factor < 1 (reduces stat)
                            Facing strong defense (#1) ‚Üí harder ‚Üí factor > 1 (boosts stat)
                            
                For DEFENSE: Facing weak attack (#20) ‚Üí easier ‚Üí factor < 1 (reduces stat)
                             Facing strong attack (#1) ‚Üí harder ‚Üí factor > 1 (boosts stat)
                """
                if pd.isna(opponent_rank) or total_teams == 0:
                    return 1.0  # No adjustment if opponent unknown
                
                # Normalize rank to 0-1 scale (0 = best, 1 = worst)
                normalized_rank = (opponent_rank - 1) / (total_teams - 1) if total_teams > 1 else 0.5
                
                # Convert to adjustment factor (0.7 to 1.3 range for balanced adjustment)
                # Weak opponent (rank 20) ‚Üí 0.7 (reduces inflated stats)
                # Strong opponent (rank 1) ‚Üí 1.3 (boosts suppressed stats)
                factor = 0.7 + (0.6 * (1 - normalized_rank))
                
                return factor
            
            # üéØ SECOND PASS: Apply SOS adjustment to each team's stats
            sos_adjusted_attack = {}
            sos_adjusted_defense = {}
            
            for team in recent_df['team_name'].unique():
                team_games = recent_df[recent_df['team_name'] == team].copy()
                
                # ATTACKING SOS ADJUSTMENT
                adjusted_attack_stats = {
                    'G': 0.0, 'xG': 0.0, 'A': 0.0, 'xA': 0.0,
                    'shots': 0.0, 'key_passes': 0.0, 'recent_games': 0
                }
                
                for gw, gw_data in team_games.groupby('gameweek'):
                    opponent = gw_data['opponent_team_name'].iloc[0]
                    opponent_def_rank = temp_defense_ranks.get(opponent, total_teams // 2)  # Default to mid-table
                    sos_factor = get_sos_factor(opponent_def_rank, total_teams, is_attack=True)
                    
                    # Apply SOS adjustment to game stats
                    adjusted_attack_stats['G'] += gw_data['G'].sum() * sos_factor
                    adjusted_attack_stats['xG'] += gw_data['xG'].sum() * sos_factor
                    adjusted_attack_stats['A'] += gw_data['A'].sum() * sos_factor
                    adjusted_attack_stats['xA'] += gw_data['xA'].sum() * sos_factor
                    adjusted_attack_stats['shots'] += gw_data['shots'].sum() * sos_factor
                    adjusted_attack_stats['key_passes'] += gw_data['key_passes'].sum() * sos_factor
                    adjusted_attack_stats['recent_games'] += 1
                
                sos_adjusted_attack[team] = adjusted_attack_stats
                
                # DEFENSIVE SOS ADJUSTMENT (only for GK/DEF)
                team_defenders = team_games[team_games['element_type'].isin([1, 2])]
                
                if len(team_defenders) > 0:
                    adjusted_defense_stats = {
                        'CS': 0.0, 'xCS': 0.0, 'GC': 0.0, 'xGC': 0.0,
                        'tackles': 0.0, 'recoveries': 0.0,
                        'clearances_blocks_interceptions': 0.0,
                        'defensive_contribution': 0.0, 'recent_games': 0
                    }
                    
                    for gw, gw_data in team_defenders.groupby('gameweek'):
                        opponent = gw_data['opponent_team_name'].iloc[0]
                        opponent_att_rank = temp_attack_ranks.get(opponent, total_teams // 2)
                        sos_factor = get_sos_factor(opponent_att_rank, total_teams, is_attack=False)
                        
                        # Apply SOS adjustment to defensive stats
                        # For GC/xGC: Use MAX per game (team-level stats)
                        adjusted_defense_stats['CS'] += gw_data['CS'].max() * sos_factor
                        adjusted_defense_stats['xCS'] += gw_data['xCS'].max() * sos_factor
                        adjusted_defense_stats['GC'] += gw_data['GC'].max() * sos_factor
                        adjusted_defense_stats['xGC'] += gw_data['xGC'].max() * sos_factor
                        
                        # For tackles/recoveries: Use SUM per game
                        adjusted_defense_stats['tackles'] += gw_data['tackles'].sum() * sos_factor
                        adjusted_defense_stats['recoveries'] += gw_data['recoveries'].sum() * sos_factor
                        adjusted_defense_stats['clearances_blocks_interceptions'] += gw_data['clearances_blocks_interceptions'].sum() * sos_factor
                        adjusted_defense_stats['defensive_contribution'] += gw_data['defensive_contribution'].sum() * sos_factor
                        adjusted_defense_stats['recent_games'] += 1
                    
                    sos_adjusted_defense[team] = adjusted_defense_stats
            
            # Convert to DataFrames
            recent_form_stats = {
                'attack': pd.DataFrame(sos_adjusted_attack).T,
                'defense': pd.DataFrame(sos_adjusted_defense).T if sos_adjusted_defense else pd.DataFrame()
            }
            
            print(f"‚úÖ SOS-Adjusted form calculated from GW {recent_gw_start} to {max_gw} ({max_gw - recent_gw_start + 1} gameweeks)")
            print(f"   üìä Adjustment accounts for opponent quality (easier opponents = reduced impact)")
        except Exception as e:
            print(f"‚ö†Ô∏è Could not calculate SOS-adjusted form: {e}")
            import traceback
            traceback.print_exc()
            recent_form_stats = None
    
    # ‚öΩ ATTACKING STRENGTH CALCULATION (Season averages)
    attacking_stats = season_data.groupby('team_name').agg({
        'season_goals': 'sum',
        'season_xG': 'sum', 
        'season_assists': 'sum',
        'season_xA': 'sum',
        'season_shots': 'sum',
        'season_SoT': 'sum',
        'season_key_passes': 'sum',
        'games_played': 'mean'
    }).round(3)
    
    # üõ°Ô∏è COMPREHENSIVE DEFENSIVE STRENGTH CALCULATION
    # Include all defensive players (GK + DEF)
    defensive_players = season_data[season_data['element_type'].isin([1, 2])]
    
    if len(defensive_players) == 0:
        print("‚ö†Ô∏è Warning: No defensive players found in dataset")
        defensive_stats = pd.DataFrame(index=attacking_stats.index)
        # Set default values for missing defensive data
        default_values = {
            'season_CS': 3.0, 'season_xCS': 3.0, 'season_GC': 1.5, 'season_xGC': 1.5,
            'season_tackles': 15.0, 'season_recoveries': 20.0, 
            'season_CBI': 10.0, 'season_defensive_contribution': 5.0,
            'games_played': attacking_stats['games_played'].iloc[0] if len(attacking_stats) > 0 else 6
        }
        for col, val in default_values.items():
            defensive_stats[col] = val
    else:
        # Aggregate all available defensive metrics
        agg_dict = {
            'games_played': 'mean'
        }
        
        # üîß FIX: CS/GC are TEAM-LEVEL stats (all defenders have same value)
        # Use 'max' to get the single team value, not 'mean'
        team_level_stats = ['season_CS', 'season_xCS', 'season_GC', 'season_xGC']
        player_level_stats = ['season_tackles', 'season_recoveries', 
                              'season_clearances_blocks_interceptions', 'season_defensive_contribution']
        
        # Team-level: Use max (all defenders have same value)
        for col in team_level_stats:
            if col in defensive_players.columns:
                agg_dict[col] = 'max'
        
        # Player-level: Use mean (average across defenders)
        for col in player_level_stats:
            if col in defensive_players.columns:
                agg_dict[col] = 'mean'
        
        defensive_stats = defensive_players.groupby('team_name').agg(agg_dict).round(3)
        
        # Rename long column name for easier handling
        if 'season_clearances_blocks_interceptions' in defensive_stats.columns:
            defensive_stats.rename(columns={'season_clearances_blocks_interceptions': 'season_CBI'}, inplace=True)
    
    # üìä CALCULATE PER-GAME METRICS
    
    # Attacking per-game metrics
    attacking_stats['goals_pg'] = attacking_stats['season_goals'] / attacking_stats['games_played']
    attacking_stats['xG_pg'] = attacking_stats['season_xG'] / attacking_stats['games_played']
    attacking_stats['assists_pg'] = attacking_stats['season_assists'] / attacking_stats['games_played']
    attacking_stats['xA_pg'] = attacking_stats['season_xA'] / attacking_stats['games_played']
    attacking_stats['shots_pg'] = attacking_stats['season_shots'] / attacking_stats['games_played']
    attacking_stats['key_passes_pg'] = attacking_stats['season_key_passes'] / attacking_stats['games_played']
    
    # Defensive per-game metrics
    defensive_stats['CS_rate'] = defensive_stats['season_CS'] / defensive_stats['games_played']
    defensive_stats['xCS_rate'] = defensive_stats['season_xCS'] / defensive_stats['games_played']
    defensive_stats['GC_pg'] = defensive_stats['season_GC'] / defensive_stats['games_played']
    defensive_stats['xGC_pg'] = defensive_stats['season_xGC'] / defensive_stats['games_played']
 
    
    if 'season_tackles' in defensive_stats.columns:
        defensive_stats['tackles_pg'] = defensive_stats['season_tackles'] / defensive_stats['games_played']
    if 'season_recoveries' in defensive_stats.columns:
        defensive_stats['recoveries_pg'] = defensive_stats['season_recoveries'] / defensive_stats['games_played']
    if 'season_CBI' in defensive_stats.columns:
        defensive_stats['CBI_pg'] = defensive_stats['season_CBI'] / defensive_stats['games_played']
    if 'season_defensive_contribution' in defensive_stats.columns:
        defensive_stats['def_contrib_pg'] = defensive_stats['season_defensive_contribution'] / defensive_stats['games_played']
    
    # üéØ ENHANCED STRENGTH CALCULATIONS (Season baseline)
    
    # Attack Strength (weighted combination of multiple metrics)
    # üéØ XG-FOCUSED: Predictive for FPL value, not reactive to lucky streaks
    attacking_stats['attack_strength'] = (
        attacking_stats['xG_pg'] * 0.20 +           # Expected goals (predictive)
        attacking_stats['goals_pg'] * 0.30 +        # Actual goals (results)
        attacking_stats['xA_pg'] * 0.15 +           # Expected assists (creativity)
        attacking_stats['assists_pg'] * 0.15 +      # Actual assists
        attacking_stats['shots_pg'] * 0.10 +        # Shot volume
        attacking_stats['key_passes_pg'] * 0.10     # Key passes (creativity)
    )
    
    # Comprehensive Defense Strength (using all available metrics)
    defense_components = []
    weights = []
    
    # üéØ FPL-FOCUSED WEIGHTS: Prioritize point-earning stats!
    # CS = 4pts for defenders, GC prevents CS
    # Def Contrib affects BPS (bonus points)
    
    # ACTUAL RESULTS: CS + GC (40% total)
    defense_components.append(defensive_stats['CS_rate'])
    weights.append(0.25)  # Clean Sheets (direct 4 FPL pts)
    
    defense_components.append(1 / (defensive_stats['GC_pg'] + 0.1))
    weights.append(0.15)  # Goals Conceded (prevents CS)
    
    # EXPECTED METRICS: 30% weight (predictive)
    if 'xCS_rate' in defensive_stats.columns:
        defense_components.append(defensive_stats['xCS_rate'])
        weights.append(0.20)  # Expected CS
    
    if 'xGC_pg' in defensive_stats.columns:
        defense_components.append(1 / (defensive_stats['xGC_pg'] + 0.1))
        weights.append(0.10)  # Expected GC
    
    # DEFENSIVE ACTIONS: 30% weight (BPS contributors)
    if 'def_contrib_pg' in defensive_stats.columns:
        def_contrib_norm = defensive_stats['def_contrib_pg'] / defensive_stats['def_contrib_pg'].max() if defensive_stats['def_contrib_pg'].max() > 0 else defensive_stats['def_contrib_pg']
        defense_components.append(def_contrib_norm)
        weights.append(0.20)  # Defensive contribution (BPS, bonus pts)
    
    if 'CBI_pg' in defensive_stats.columns:
        cbi_norm = defensive_stats['CBI_pg'] / defensive_stats['CBI_pg'].max() if defensive_stats['CBI_pg'].max() > 0 else defensive_stats['CBI_pg']
        defense_components.append(cbi_norm)
        weights.append(0.05)  # Clearances/blocks/interceptions
    
    if 'tackles_pg' in defensive_stats.columns:
        tackles_norm = defensive_stats['tackles_pg'] / defensive_stats['tackles_pg'].max() if defensive_stats['tackles_pg'].max() > 0 else defensive_stats['tackles_pg']
        defense_components.append(tackles_norm)
        weights.append(0.05)  # Tackles
    
    # Normalize weights to sum to 1
    total_weight = sum(weights)
    weights = [w/total_weight for w in weights]
    
    # Calculate weighted defensive strength
    defensive_stats['defense_strength'] = sum(comp * weight for comp, weight in zip(defense_components, weights))
    
    # üî• BLEND RECENT FORM WITH SEASON AVERAGES (40% season, 60% recent)
    if recent_form_stats is not None:
        print("\nüîÑ Blending recent form (60%) with season averages (40%)...")
        
        # Blend attacking strength
        for team in attacking_stats.index:
            if team in recent_form_stats['attack'].index:
                recent_attack = recent_form_stats['attack'].loc[team]
                recent_games = recent_attack['recent_games']
                
                if recent_games > 0:
                    # Calculate recent form strength using same weights
                    recent_attack_strength = (
                        (recent_attack['xG'] / recent_games) * 0.20 +
                        (recent_attack['G'] / recent_games) * 0.30 +
                        (recent_attack['xA'] / recent_games) * 0.15 +
                        (recent_attack['A'] / recent_games) * 0.15 +
                        (recent_attack['shots'] / recent_games) * 0.10 +
                        (recent_attack['key_passes'] / recent_games) * 0.10
                    )
                    
                    # Blend: 60% recent form, 40% season average (prioritize current form)
                    season_strength = attacking_stats.loc[team, 'attack_strength']
                    attacking_stats.loc[team, 'attack_strength'] = (
                        recent_attack_strength * 0.60 + season_strength * 0.40
                    )
        
        # Blend defensive strength
        for team in defensive_stats.index:
            if team in recent_form_stats['defense'].index:
                recent_defense = recent_form_stats['defense'].loc[team]
                recent_games = recent_defense['recent_games']
                
                if recent_games > 0:
                    # Calculate recent form defensive strength
                    recent_defense_components = []
                    recent_weights = []
                    
                    # ACTUAL RESULTS: 40% weight (CS + GC)
                    recent_defense_components.append(recent_defense['CS'] / recent_games)
                    recent_weights.append(0.25)  # Clean sheets
                    
                    recent_defense_components.append(1 / (recent_defense['GC'] / recent_games + 0.1))
                    recent_weights.append(0.15)  # Goals conceded
                    
                    # EXPECTED: 30% weight (predictive)
                    if 'xCS' in recent_defense.index:
                        recent_defense_components.append(recent_defense['xCS'] / recent_games)
                        recent_weights.append(0.20)
                    
                    if 'xGC' in recent_defense.index:
                        recent_defense_components.append(1 / (recent_defense['xGC'] / recent_games + 0.1))
                        recent_weights.append(0.10)
                    
                    # DEFENSIVE ACTIONS: 30% weight (BPS)
                    recent_def_df = recent_form_stats['defense']
                    if 'defensive_contribution' in recent_defense.index and recent_def_df['defensive_contribution'].max() > 0:
                        recent_defense_components.append(recent_defense['defensive_contribution'] / recent_def_df['defensive_contribution'].max())
                        recent_weights.append(0.20)  # Def contrib
                    
                    if 'clearances_blocks_interceptions' in recent_defense.index and recent_def_df['clearances_blocks_interceptions'].max() > 0:
                        recent_defense_components.append(recent_defense['clearances_blocks_interceptions'] / recent_def_df['clearances_blocks_interceptions'].max())
                        recent_weights.append(0.05)  # CBI
                    
                    if 'tackles' in recent_defense.index and recent_def_df['tackles'].max() > 0:
                        recent_defense_components.append(recent_defense['tackles'] / recent_def_df['tackles'].max())
                        recent_weights.append(0.05)  # Tackles
                    
                    # Normalize weights
                    total_recent_weight = sum(recent_weights)
                    recent_weights = [w/total_recent_weight for w in recent_weights]
                    
                    recent_defense_strength = sum(comp * weight for comp, weight in zip(recent_defense_components, recent_weights))
                    
                    # Blend: 60% recent form, 40% season average (prioritize current form)
                    season_strength = defensive_stats.loc[team, 'defense_strength']
                    defensive_stats.loc[team, 'defense_strength'] = (
                        recent_defense_strength * 0.60 + season_strength * 0.40
                    )
        
        print("‚úÖ Form blending complete - Rankings now reflect recent performance!")
    else:
        print("‚ÑπÔ∏è Using season-long averages only (no recent form data)")
    
    # üèÜ COMBINE TEAM RANKINGS
    team_rankings = attacking_stats[['attack_strength']].join(
        defensive_stats[['defense_strength']], how='outer'
    )
    
    # üîß FIXED: Handle missing data (pandas 3.0 compatible)
    team_rankings = team_rankings.fillna({
        'attack_strength': team_rankings['attack_strength'].median(),
        'defense_strength': team_rankings['defense_strength'].median()
    })
    
    # Overall strength calculation
    team_rankings['overall_strength'] = (
        team_rankings['attack_strength'] * 0.6 + 
        team_rankings['defense_strength'] * 0.4
    )
    
    # Generate rankings
    team_rankings['attack_rank'] = team_rankings['attack_strength'].rank(ascending=False, method='dense').astype(int)
    team_rankings['defense_rank'] = team_rankings['defense_strength'].rank(ascending=False, method='dense').astype(int)
    team_rankings['overall_rank'] = team_rankings['overall_strength'].rank(ascending=False, method='dense').astype(int)
    
    return team_rankings.round(3)

# Generate comprehensive team rankings WITH FORM WEIGHTING
# Pass the raw df to enable form calculation
team_rankings = create_comprehensive_team_strength_rankings(season_stats, raw_df=df)
team_rankings_sorted = team_rankings.sort_values('overall_rank')

print("\nüèÜ COMPREHENSIVE TEAM STRENGTH RANKINGS")
print("=" * 65)
print("üìã All Teams Ranked (Enhanced with Recent Form + Defensive Analysis):")
print(team_rankings_sorted[['overall_rank', 'attack_rank', 'defense_rank', 
                           'overall_strength', 'attack_strength', 'defense_strength']].to_string())

print(f"\n‚öΩ TOP ATTACKING TEAMS:")
attack_rankings = team_rankings.sort_values('attack_rank').head(20)
for idx, (team, data) in enumerate(attack_rankings.iterrows(), 1):
    team_short = season_stats[season_stats['team_name'] == team]['team_name_short'].iloc[0] if not season_stats[season_stats['team_name'] == team].empty else 'UNK'
    print(f" {int(data['attack_rank']):2d}. {team:<15} [{team_short}] (Attack: {data['attack_strength']:.3f})")

print(f"\nüõ°Ô∏è TOP DEFENSIVE TEAMS:")
defense_rankings = team_rankings.sort_values('defense_rank').head(20)
for idx, (team, data) in enumerate(defense_rankings.iterrows(), 1):
    print(f" {int(data['defense_rank']):2d}. {team:<15} [{team_short}] (Defense: {data['defense_strength']:.3f})")

In [None]:
# Export team rankings to JSON files
print("\n" + "="*70)
print("üì§ EXPORTING TEAM RANKINGS TO JSON")
print("="*70)

os.makedirs('backend/data/rankings', exist_ok=True)

# Prepare rankings data with additional stats from season_stats
attack_rankings_data = []
defense_rankings_data = []
overall_rankings_data = []

for team, ranks in team_rankings.sort_values('attack_rank').iterrows():
    # Get team stats
    team_data = season_stats[season_stats['team_name'] == team]
    if not team_data.empty:
        team_short = team_data['team_name_short'].iloc[0] if 'team_name_short' in team_data.columns else team[:3].upper()
    else:
        team_short = team[:3].upper()
    
    # Get aggregated stats if available
    gpg = season_stats[season_stats['team_name'] == team]['season_goals'].sum() / season_stats[season_stats['team_name'] == team]['games_played'].max() if not team_data.empty else 0
    expected_gpg = season_stats[season_stats['team_name'] == team]['season_xG'].sum() / season_stats[season_stats['team_name'] == team]['games_played'].max() if not team_data.empty else 0
    
    # Get defensive stats (only from defenders and goalkeepers)
    def_players = season_stats[(season_stats['team_name'] == team) & (season_stats['position_name'].isin(['Defender', 'Goalkeeper']))]
    # üîß FIX: Use max() not mean() - all defenders have same team-level CS/GC values
    gcpg = def_players['season_GC'].max() if not def_players.empty and 'season_GC' in def_players.columns else 0
    cs_rate = def_players['season_CS'].max() / def_players['games_played'].max() if not def_players.empty and 'season_CS' in def_players.columns else 0
    
    # Defensive contribution (tackles, recoveries, etc.)
    def_contrib = 0
    if not def_players.empty:
        if 'season_tackles' in def_players.columns:
            def_contrib += def_players['season_tackles'].mean()
    
    attack_rankings_data.append({
        'team': team,
        'team_short': team_short,
        'attack_rank': int(ranks['attack_rank']),
        'overall_rank': int(ranks['overall_rank']),
        'overall_strength': round(ranks['overall_strength'], 3),
        'attack_strength': round(ranks['attack_strength'], 3),
        'goals_per_game': round(gpg, 2),
        'expected_goals_per_game': round(expected_gpg, 2),
        'goals_conceded_per_game': round(gcpg, 2),
        'clean_sheet_rate': round(cs_rate, 2),
        'defensive_contribution': round(def_contrib, 0)
    })

for team, ranks in team_rankings.sort_values('defense_rank').iterrows():
    # Get team stats
    team_data = season_stats[season_stats['team_name'] == team]
    if not team_data.empty:
        team_short = team_data['team_name_short'].iloc[0] if 'team_name_short' in team_data.columns else team[:3].upper()
    else:
        team_short = team[:3].upper()
    
    # Get aggregated stats
    gpg = season_stats[season_stats['team_name'] == team]['season_goals'].sum() / season_stats[season_stats['team_name'] == team]['games_played'].max() if not team_data.empty else 0
    expected_gpg = season_stats[season_stats['team_name'] == team]['season_xG'].sum() / season_stats[season_stats['team_name'] == team]['games_played'].max() if not team_data.empty else 0
    
    # Get defensive stats
    def_players = season_stats[(season_stats['team_name'] == team) & (season_stats['position_name'].isin(['Defender', 'Goalkeeper']))]
    # üîß FIX: Use max() not mean() - all defenders have same team-level CS/GC values
    gcpg = def_players['season_GC'].max() if not def_players.empty and 'season_GC' in def_players.columns else 0
    cs_rate = def_players['season_CS'].max() / def_players['games_played'].max() if not def_players.empty and 'season_CS' in def_players.columns else 0
    
    # Defensive contribution
    def_contrib = 0
    if not def_players.empty:
        if 'season_tackles' in def_players.columns:
            def_contrib += def_players['season_tackles'].mean()
    
    defense_rankings_data.append({
        'team': team,
        'team_short': team_short,
        'defense_rank': int(ranks['defense_rank']),
        'overall_rank': int(ranks['overall_rank']),
        'overall_strength': round(ranks['overall_strength'], 3),
        'defense_strength': round(ranks['defense_strength'], 3),
        'goals_per_game': round(gpg, 2),
        'expected_goals_per_game': round(expected_gpg, 2),
        'goals_conceded_per_game': round(gcpg, 2),
        'clean_sheet_rate': round(cs_rate, 2),
        'defensive_contribution': round(def_contrib, 0)
    })

for team, ranks in team_rankings.sort_values('overall_rank').iterrows():
    # Get team stats
    team_data = season_stats[season_stats['team_name'] == team]
    if not team_data.empty:
        team_short = team_data['team_name_short'].iloc[0] if 'team_name_short' in team_data.columns else team[:3].upper()
    else:
        team_short = team[:3].upper()
    
    # Get aggregated stats
    gpg = season_stats[season_stats['team_name'] == team]['season_goals'].sum() / season_stats[season_stats['team_name'] == team]['games_played'].max() if not team_data.empty else 0
    expected_gpg = season_stats[season_stats['team_name'] == team]['season_xG'].sum() / season_stats[season_stats['team_name'] == team]['games_played'].max() if not team_data.empty else 0
    
    # Get defensive stats
    def_players = season_stats[(season_stats['team_name'] == team) & (season_stats['position_name'].isin(['Defender', 'Goalkeeper']))]
    gcpg = def_players['season_GC'].mean() if not def_players.empty and 'season_GC' in def_players.columns else 0
    cs_rate = def_players['season_CS'].mean() / def_players['games_played'].max() if not def_players.empty and 'season_CS' in def_players.columns else 0
    
    # Defensive contribution
    def_contrib = 0
    if not def_players.empty:
        if 'season_tackles' in def_players.columns:
            def_contrib += def_players['season_tackles'].mean()
    
    overall_rankings_data.append({
        'team': team,
        'team_short': team_short,
        'overall_rank': int(ranks['overall_rank']),
        'attack_rank': int(ranks['attack_rank']),
        'defense_rank': int(ranks['defense_rank']),
        'overall_strength': round(ranks['overall_strength'], 3),
        'attack_strength': round(ranks['attack_strength'], 3),
        'defense_strength': round(ranks['defense_strength'], 3),
        'goals_per_game': round(gpg, 2),
        'expected_goals_per_game': round(expected_gpg, 2),
        'goals_conceded_per_game': round(gcpg, 2),
        'clean_sheet_rate': round(cs_rate, 2),
        'defensive_contribution': round(def_contrib, 0)
    })

# Write to JSON files
with open('backend/data/rankings/attack_rankings.json', 'w', encoding='utf-8') as f:
    json.dump(attack_rankings_data, f, indent=2, ensure_ascii=False)

with open('backend/data/rankings/defense_rankings.json', 'w', encoding='utf-8') as f:
    json.dump(defense_rankings_data, f, indent=2, ensure_ascii=False)

with open('backend/data/rankings/overall_rankings.json', 'w', encoding='utf-8') as f:
    json.dump(overall_rankings_data, f, indent=2, ensure_ascii=False)

print(f"‚úÖ Exported {len(attack_rankings_data)} teams to attack_rankings.json")
print(f"‚úÖ Exported {len(defense_rankings_data)} teams to defense_rankings.json")
print(f"‚úÖ Exported {len(overall_rankings_data)} teams to overall_rankings.json")

In [None]:
import pandas as pd
import numpy as np

# üèüÔ∏è CALCULATE DYNAMIC HOME ADVANTAGE FROM ACTUAL DATA
print("="*70)
print("üèüÔ∏è CALCULATING DYNAMIC HOME ADVANTAGE (FIXED)")
print("="*70)

def calculate_home_away_advantage(raw_df, team_rankings):
    """
    Calculate actual home/away performance for each team from gameweek data.
    Returns home advantage factors for adjusting fixture difficulty dynamically.
    
    FIXED: Now counts unique FIXTURES (gameweeks), not player rows!
    
    Process:
    1. Separate home and away games for each team
    2. Calculate attack/defense strength for each context
    3. Compute advantage factor as (home_strength - away_strength) / away_strength
    4. Convert to rank adjustment (negative = better home performance)
    """
    
    home_away_advantage = {}
    
    for team in team_rankings.index:
        # Get all games for this team
        team_home = raw_df[(raw_df['team_name'] == team) & (raw_df['was_home'] == True)]
        team_away = raw_df[(raw_df['team_name'] == team) & (raw_df['was_home'] == False)]
        
        # üîß FIX: Count unique gameweeks (fixtures), not player rows
        home_fixtures = team_home['gameweek'].nunique()
        away_fixtures = team_away['gameweek'].nunique()
        
        if home_fixtures < 2 or away_fixtures < 2:
            # Not enough data - use moderate default
            home_away_advantage[team] = {
                'home_games': home_fixtures,
                'away_games': away_fixtures,
                'home_attack_str': 0.0,
                'away_attack_str': 0.0,
                'home_defense_str': 0.0,
                'away_defense_str': 0.0,
                'attack_advantage_factor': 0.0,  # No advantage if insufficient data
                'defense_advantage_factor': 0.0,
                'attack_rank_boost': 0.0,
                'defense_rank_boost': 0.0,
                'data_quality': 'insufficient'
            }
            continue
        
        # ‚öΩ CALCULATE HOME ATTACKING STRENGTH
        # Sum stats THEN divide by fixtures (not player rows)
        home_attack_strength = (
            (team_home['xG'].sum() / home_fixtures) * 0.25 +
            (team_home['G'].sum() / home_fixtures) * 0.20 +
            (team_home['xA'].sum() / home_fixtures) * 0.20 +
            (team_home['A'].sum() / home_fixtures) * 0.15 +
            (team_home['shots'].sum() / home_fixtures) * 0.10 +
            (team_home['key_passes'].sum() / home_fixtures) * 0.10
        )
        
        # ‚öΩ CALCULATE AWAY ATTACKING STRENGTH
        away_attack_strength = (
            (team_away['xG'].sum() / away_fixtures) * 0.25 +
            (team_away['G'].sum() / away_fixtures) * 0.20 +
            (team_away['xA'].sum() / away_fixtures) * 0.20 +
            (team_away['A'].sum() / away_fixtures) * 0.15 +
            (team_away['shots'].sum() / away_fixtures) * 0.10 +
            (team_away['key_passes'].sum() / away_fixtures) * 0.10
        )
        
        # üõ°Ô∏è CALCULATE HOME DEFENSIVE STRENGTH (for defenders/GK only)
        home_defenders = team_home[team_home['element_type'].isin([1, 2])]
        if len(home_defenders) > 0:
            # Use MAX for CS/GC per gameweek (team only gets 1 CS per game, not sum of all defenders)
            home_cs = home_defenders.groupby('gameweek')['CS'].max().sum()
            home_gc = home_defenders.groupby('gameweek')['GC'].max().sum()
            
            # üîß FIXED: Group by gameweek first to get team totals, then calculate average per fixture
            home_tackles_per_fixture = home_defenders.groupby('gameweek')['tackles'].sum().mean()
            home_recoveries_per_fixture = home_defenders.groupby('gameweek')['recoveries'].sum().mean()
            home_xCS_per_fixture = home_defenders.groupby('gameweek')['xCS'].sum().mean()
            
            home_defense_strength = (
                (home_cs / home_fixtures) * 0.35 +
                (1 / (home_gc / home_fixtures + 0.1)) * 0.30 +
                home_xCS_per_fixture * 0.20 +
                home_tackles_per_fixture * 0.10 +
                home_recoveries_per_fixture * 0.05
            )
        else:
            home_defense_strength = 0.0
        
        # üõ°Ô∏è CALCULATE AWAY DEFENSIVE STRENGTH
        away_defenders = team_away[team_away['element_type'].isin([1, 2])]
        if len(away_defenders) > 0:
            away_cs = away_defenders.groupby('gameweek')['CS'].max().sum()
            away_gc = away_defenders.groupby('gameweek')['GC'].max().sum()
            
            # üîß FIXED: Group by gameweek first to get team totals, then calculate average per fixture
            away_tackles_per_fixture = away_defenders.groupby('gameweek')['tackles'].sum().mean()
            away_recoveries_per_fixture = away_defenders.groupby('gameweek')['recoveries'].sum().mean()
            away_xCS_per_fixture = away_defenders.groupby('gameweek')['xCS'].sum().mean()
            
            away_defense_strength = (
                (away_cs / away_fixtures) * 0.35 +
                (1 / (away_gc / away_fixtures + 0.1)) * 0.30 +
                away_xCS_per_fixture * 0.20 +
                away_tackles_per_fixture * 0.10 +
                away_recoveries_per_fixture * 0.05
            )
        else:
            away_defense_strength = 0.0
        
        # üìä CALCULATE ADVANTAGE FACTORS
        # Positive = better at home, Negative = better away
        if away_attack_strength > 0:
            attack_advantage_factor = (home_attack_strength - away_attack_strength) / away_attack_strength
        else:
            attack_advantage_factor = 0.0
        
        if away_defense_strength > 0:
            defense_advantage_factor = (home_defense_strength - away_defense_strength) / away_defense_strength
        else:
            defense_advantage_factor = 0.0
        
        # üéØ CONVERT ADVANTAGE FACTOR TO RANK BOOST
        # More aggressive scaling (was 0.5, now 0.8) to better reflect home advantage
        
        total_teams = len(team_rankings)
        attack_rank_boost = attack_advantage_factor * (total_teams / 10) * 0.8
        defense_rank_boost = defense_advantage_factor * (total_teams / 10) * 0.8
        
        # üöÄ CALCULATE AWAY ADVANTAGE (for when this team plays away)
        # If team performs better away, they get boost when playing away
        # Negative advantage factor = better away performance
        away_attack_advantage_factor = -attack_advantage_factor  # Flip the sign
        away_defense_advantage_factor = -defense_advantage_factor
        
        away_attack_rank_boost = away_attack_advantage_factor * (total_teams / 10) * 0.8
        away_defense_rank_boost = away_defense_advantage_factor * (total_teams / 10) * 0.8
        
        home_away_advantage[team] = {
            'home_games': home_fixtures,
            'away_games': away_fixtures,
            'home_attack_str': round(home_attack_strength, 3),
            'away_attack_str': round(away_attack_strength, 3),
            'home_defense_str': round(home_defense_strength, 3),
            'away_defense_str': round(away_defense_strength, 3),
            'attack_advantage_factor': round(attack_advantage_factor, 3),  # % difference
            'defense_advantage_factor': round(defense_advantage_factor, 3),
            'attack_rank_boost': round(attack_rank_boost, 2),  # Rank positions for HOME games
            'defense_rank_boost': round(defense_rank_boost, 2),
            'away_attack_rank_boost': round(away_attack_rank_boost, 2),  # Rank boost for AWAY games
            'away_defense_rank_boost': round(away_defense_rank_boost, 2),
            'data_quality': 'good' if home_fixtures >= 5 and away_fixtures >= 5 else 'limited'
        }
    
    return pd.DataFrame(home_away_advantage).T

# Generate home/away advantage data
home_away_df = calculate_home_away_advantage(df, team_rankings)

# Sort by attack advantage (most impactful home teams first)
home_away_sorted = home_away_df.sort_values('attack_advantage_factor', ascending=False)

print("\nüèÜ HOME ADVANTAGE BY TEAM (FIXED)")
print("="*100)
print("\nüìä Top 10 Home Advantage Teams (Attack):")
print("Team                    | Fixtures (H/A) | Home Att | Away Att | Advantage | Rank Boost")
print("-" * 100)

for team, data in home_away_sorted.head(10).iterrows():
    print(f"{team:<23} | {int(data['home_games']):2d} / {int(data['away_games']):2d}       | "
          f"{data['home_attack_str']:7.3f}  | {data['away_attack_str']:7.3f} | "
          f"{data['attack_advantage_factor']:+7.1%} | {data['attack_rank_boost']:+5.2f} ranks")

print("\nüõ°Ô∏è Top 10 Home Advantage Teams (Defense):")
defense_sorted = home_away_df.sort_values('defense_advantage_factor', ascending=False)
print("Team                    | Fixtures (H/A) | Home Def | Away Def | Advantage | Rank Boost")
print("-" * 100)

for team, data in defense_sorted.head(10).iterrows():
    print(f"{team:<23} | {int(data['home_games']):2d} / {int(data['away_games']):2d}       | "
          f"{data['home_defense_str']:7.3f}  | {data['away_defense_str']:7.3f} | "
          f"{data['defense_advantage_factor']:+7.1%} | {data['defense_rank_boost']:+5.2f} ranks")



print("\nüìà Data Quality Summary:")
print("‚úÖ Fixed: Now uses unique fixtures instead of player rows!")

print(f"Teams with good data (5+ fixtures each): {len(home_away_df[home_away_df['data_quality'] == 'good'])}")
print("\n‚úÖ Home/away advantage data calculated and ready for fixture analysis!")

print(f"Teams with limited data: {len(home_away_df[home_away_df['data_quality'] == 'limited'])}")# Save for use in fixture analyzer

print(f"Teams with insufficient data: {len(home_away_df[home_away_df['data_quality'] == 'insufficient'])}")

In [None]:
# Initialize lists to avoid duplicates
attacking_picks = []
defensive_picks = []

def get_players_for_matchup(team, matchup_type, season_stats, team_rankings, n=4):
    team_players = season_stats[season_stats['team_name'] == team].copy()
    if team_players.empty:
        return pd.DataFrame()
    
    # Set defaults for missing columns
    default_cols = {
        'season_xG': 0.0, 'season_xGC': 0.0, 'season_CS': 0.0, 'season_xCS': 0.0,
        'season_points': 0.0, 'season_goals': 0.0, 'season_assists': 0.0,
        'season_xA': 0.0, 'season_shots': 0.0, 'season_SoT': 0.0, 'season_SiB': 0.0,
        'season_minutes': 0.0, 'now_cost': 5.0, 'selected_by_percent': 0.0, 'form': 0.0
    }
    for col, val in default_cols.items():
        if col not in team_players.columns:
            team_players[col] = val
    
    # Filter out players with insufficient minutes (less than 180 minutes = 2 full games)
    # This prevents inflated per-90 stats for rarely-used substitutes
    min_minutes_threshold = 180
    team_players = team_players[team_players['season_minutes'] >= min_minutes_threshold]
    
    if team_players.empty:
        return pd.DataFrame()
    
    # Use minutes played / 90 instead of games_played for accurate per-game metrics
    team_players['games_equivalent'] = team_players['season_minutes'] / 90
    
    # Compute metrics using games_equivalent (minutes/90)
    team_players['points_per_game'] = team_players['season_points'] / team_players['games_equivalent']
    team_players['points_per_million'] = team_players['season_points'] / team_players['now_cost'].replace(0, 1)
    
    # üîß FIXED: Calculate consistency as avg minutes per appearance (rewards full 90min starters)
    team_players['consistency_score'] = np.minimum(
        (team_players['season_minutes'] / team_players['games_played']) / 90, 
        1
    )
    
    if matchup_type == 'weak_defense':
        team_players['xg_per_game'] = team_players['season_xG'] / team_players['games_equivalent']
        team_players['xa_per_game'] = team_players['season_xA'] / team_players['games_equivalent']
        team_players['goals_per_game'] = team_players['season_goals'] / team_players['games_equivalent']
        team_players['assists_per_game'] = team_players['season_assists'] / team_players['games_equivalent']
        team_players['shots_per_game'] = team_players['season_shots'] / team_players['games_equivalent']
        
        # Add key passes if available
        if 'season_key_passes' in team_players.columns:
            team_players['key_passes_per_game'] = team_players['season_key_passes'] / team_players['games_equivalent']
        else:
            team_players['key_passes_per_game'] = 0.0
        
        team_players['SoT_per_game'] = team_players['season_SoT'] / team_players['games_equivalent']
        team_players['SiB_per_game'] = team_players['season_SiB'] / team_players['games_equivalent']
        position_filter = team_players['position_name'].isin(['Forward', 'Midfielder'])
        
        # Composite attacker score (aligned with attack ranking weights)
        # 20% xG, 30% goals, 15% xA, 15% assists, 10% shots, 10% key passes = 100% base stats
        # Then 60% base stats + 25% form + 15% consistency
        team_players['attacker_score'] = (
            0.20 * team_players['xg_per_game'] +
            0.30 * team_players['goals_per_game'] +
            0.15 * team_players['xa_per_game'] +
            0.15 * team_players['assists_per_game'] +
            0.10 * team_players['shots_per_game'] +
            0.10 * team_players['key_passes_per_game']
        ) * 0.60 + 0.25 * team_players['form'] + 0.15 * team_players['consistency_score']
        sort_columns = ['attacker_score', 'points_per_game', 'goals_per_game']
        display_cols = [
            'web_name', 'position_name', 'now_cost', 'goals_per_game', 'assists_per_game',
            'xg_per_game', 'xa_per_game', 'shots_per_game', 'key_passes_per_game', 'SoT_per_game',
            'points_per_game', 'points_per_million', 'consistency_score', 'selected_by_percent',
            'team_name_short', 'form', 'attacker_score'
        ]
    elif matchup_type == 'weak_attack':
        # Use games_played for clean sheet rate (it's a per-game stat, not per-90-minutes)
        team_players['clean_sheet_rate'] = team_players['season_CS'] / team_players['games_played']
        team_players['xcs_per_game'] = team_players['season_xCS'] / team_players['games_equivalent']
        team_players['xgc_per_game'] = team_players['season_xGC'] / team_players['games_equivalent']
        team_players['goals_conceded_per_game'] = team_players['season_GC'] / team_players['games_equivalent']
        
        # Add defensive contribution if available
        if 'season_defensive_contribution' in team_players.columns:
            team_players['def_contrib_per_game'] = team_players['season_defensive_contribution'] / team_players['games_equivalent']
        else:
            team_players['def_contrib_per_game'] = 0.0
        
        # Add attacking stats for defenders (to capture attacking wing-backs)
        team_players['goals_per_game'] = team_players['season_goals'] / team_players['games_equivalent']
        team_players['assists_per_game'] = team_players['season_assists'] / team_players['games_equivalent']
        team_players['xg_per_game'] = team_players['season_xG'] / team_players['games_equivalent']
        team_players['xa_per_game'] = team_players['season_xA'] / team_players['games_equivalent']
        
        position_filter = team_players['position_name'].isin(['Defender', 'Goalkeeper'])
        
        # Composite defender score with ATTACKING contribution for wing-backs
        # DEFENSIVE (50% weight): 25% CS, 20% xCS, 20% Def Contrib, 15% GC, 10% xGC
        defensive_component = (
            0.25 * team_players['clean_sheet_rate'] +
            0.20 * team_players['xcs_per_game'] +
            0.20 * (team_players['def_contrib_per_game'] / team_players['def_contrib_per_game'].max() if team_players['def_contrib_per_game'].max() > 0 else 0) +
            0.15 / (team_players['goals_conceded_per_game'] + 0.1) +
            0.10 / (team_players['xgc_per_game'] + 0.1)
        )
        
        # ATTACKING (20% weight): Goals, Assists, xG, xA for attacking defenders
        attacking_component = (
            0.40 * team_players['goals_per_game'] +
            0.35 * team_players['assists_per_game'] +
            0.15 * team_players['xg_per_game'] +
            0.10 * team_players['xa_per_game']
        )
        
        # Combined: 50% defensive + 20% attacking + 20% form + 10% consistency
        team_players['defender_score'] = (
            0.50 * defensive_component +
            0.20 * attacking_component +
            0.20 * team_players['form'] +
            0.10 * team_players['consistency_score']
        )
        
        sort_columns = ['defender_score', 'points_per_game']
        display_cols = [
            'web_name', 'position_name', 'now_cost', 'clean_sheet_rate', 'xcs_per_game',
            'goals_per_game', 'assists_per_game', 'goals_conceded_per_game', 'def_contrib_per_game',
            'points_per_game', 'points_per_million', 'consistency_score', 'selected_by_percent',
            'team_name_short', 'form', 'defender_score'
        ]
    else:
        return pd.DataFrame()
    
    filtered_players = team_players[position_filter]
    if filtered_players.empty:
        return pd.DataFrame()
    
    for col in sort_columns:
        if col not in filtered_players.columns:
            filtered_players[col] = 0.0
    
    result = filtered_players.sort_values(by=sort_columns, ascending=False).head(n)[display_cols]
    return result.round(3)

# SHOW ALL TEAMS: Complete attacking rankings with player recommendations
print(f"\n‚öΩ ATTACKING PICKS FROM ALL TEAMS (Sorted by Attack Rank):")
print("=" * 60)
all_attacking_teams = team_rankings.sort_values('attack_rank').head(20)  # Limit to top 20 teams

for idx, (team, data) in enumerate(all_attacking_teams.iterrows()):
    if team in season_stats['team_name'].values:
        attack_rank = int(data['attack_rank'])
        attack_strength = data['attack_strength']
        overall_strength = data['overall_strength']
        
        attackers = get_players_for_matchup(team, 'weak_defense', season_stats, team_rankings, 4)
        if not attackers.empty:
            print(f"\nüî¥ {team} (#{attack_rank} Attack, Strength: {attack_strength:.3f}, Overall: {overall_strength:.3f}):")
            print(attackers.to_string(index=False))
            
            # Collect for JSON
            team_data = {
                'team': team,
                'attack_rank': attack_rank,
                'attack_strength': attack_strength,
                'overall_strength': overall_strength,
                'players': attackers.to_dict(orient='records')
            }
            attacking_picks.append(team_data)
        else:
            print(f"\nüî¥ {team} (#{attack_rank} Attack, Strength: {attack_strength:.3f}, Overall: {overall_strength:.3f}): No attacking players found")

# SHOW ALL TEAMS: Complete defensive rankings with player recommendations  
print(f"\nüõ°Ô∏è DEFENSIVE PICKS FROM ALL TEAMS (Sorted by Defense Rank):")
print("=" * 60)

all_defensive_teams = team_rankings.sort_values('defense_rank').head(20)  # Limit to top 20 teams

for idx, (team, data) in enumerate(all_defensive_teams.iterrows()):
    if team in season_stats['team_name'].values:
        defense_rank = int(data['defense_rank'])
        defense_strength = data['defense_strength']
        overall_strength = data['overall_strength']
        
        defenders = get_players_for_matchup(team, 'weak_attack', season_stats, team_rankings, 4)
        if not defenders.empty:
            print(f"\nüîµ {team} (#{defense_rank} Defense, Strength: {defense_strength:.3f}, Overall: {overall_strength:.3f}):")
            print(defenders.to_string(index=False))
            
            # Collect for JSON
            team_data = {
                'team': team,
                'defense_rank': defense_rank,
                'defense_strength': defense_strength,
                'overall_strength': overall_strength,
                'players': defenders.to_dict(orient='records')
            }
            defensive_picks.append(team_data)
        else:
            print(f"\nüîµ {team} (#{defense_rank} Defense, Strength: {defense_strength:.3f}, Overall: {overall_strength:.3f}): No defensive players found")

# Debugging: Print number of teams
print(f"\nProcessed {len(attacking_picks)} attacking teams")
print(f"Processed {len(defensive_picks)} defensive teams")

# Export to JSON
os.makedirs('backend/data/quick_picks', exist_ok=True)

with open('backend/data/quick_picks/attackingpicks.json', 'w', encoding='utf-8') as f:
    json.dump(attacking_picks, f, indent=4, ensure_ascii=False)

with open('backend/data/quick_picks/defensivepicks.json', 'w', encoding='utf-8') as f:
    json.dump(defensive_picks, f, indent=4, ensure_ascii=False)

print("Exported defensive picks to backend/data/quick_picks/defensivepicks.json")
print("\nExported attacking picks to backend/data/quick_picks/attackingpicks.json")

# Team strength rankings calculated using form-weighted stats

# üîÆ Fixture Analyzer

In [None]:
team_rankings

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import json
import os

class EnhancedFixtureAnalyzer:
    """Advanced fixture analysis system for FPL planning"""
    
    def __init__(self, season_stats, team_rankings, fixtures_path='fixture_template.csv', home_away_df=None):
        """Initialize with your existing data"""
        self.season_stats = season_stats
        self.team_rankings = team_rankings
        self.home_away_df = home_away_df
        self.fixtures_df = pd.read_csv(fixtures_path)
        self.current_gw = season_stats['last_gameweek'].max()
        self.start_gw = self.current_gw + 1
        self._process_data()
        
    def _process_data(self):
        """Process the data and create team mappings"""
        self._map_team_names()
    
    def _map_team_names(self):
        """Map fixture team names to season_stats team names"""
        fixture_teams = set(self.fixtures_df['home_team'].unique()) | set(self.fixtures_df['away_team'].unique())
        season_teams = set(self.season_stats['team_name'].unique())
        
        manual_mappings = {
            'Tottenham': 'Spurs',
            'Tottenham Hotspur': 'Spurs',
            'Nottm Forest': "Nott'm Forest",
            'Nottingham Forest': "Nott'm Forest",
            'Man Utd': 'Man Utd',
            'Manchester United': 'Man Utd',
            'Man City': 'Man City',
            'Manchester City': 'Man City',
            'Newcastle': 'Newcastle',
            'Newcastle United': 'Newcastle'
        }
        
        self.team_mapping = {}
        
        for fixture_team in fixture_teams:
            if fixture_team in manual_mappings:
                mapped_name = manual_mappings[fixture_team]
                if mapped_name in season_teams:
                    self.team_mapping[fixture_team] = mapped_name
                    continue
            
            if fixture_team in season_teams:
                self.team_mapping[fixture_team] = fixture_team
                continue
            
            best_match = None
            for season_team in season_teams:
                if (fixture_team.lower().replace(' ', '') in season_team.lower().replace(' ', '') or
                    season_team.lower().replace(' ', '') in fixture_team.lower().replace(' ', '')):
                    best_match = season_team
                    break
            if best_match:
                self.team_mapping[fixture_team] = best_match
            else:
                self.team_mapping[fixture_team] = fixture_team
                print(f"‚ö†Ô∏è Could not match '{fixture_team}' - using default mapping")
                
    def get_fixture_difficulty_matrix(self, start_gw=None, end_gw=None, home_advantage=0):
        """Create fixture difficulty matrix using actual strength scores (not ranks)
        
        üîß METHODOLOGY:
        - Attack & Defense calculated from different components (goals vs CS/GC)
        - Cannot directly compare raw values (different scales/units)
        - Use Z-SCORE STANDARDIZATION: (value - mean) / std
        - Puts both on same statistical scale (standard deviations from mean)
        - Enables valid comparison: "Elite attack (+2 SD) vs weak defense (-1.5 SD)"
        """
        if start_gw is None:
            start_gw = self.fixtures_df['gameweek'].min()
        if end_gw is None:
            end_gw = self.fixtures_df['gameweek'].max()
            
        fixtures_period = self.fixtures_df[
            (self.fixtures_df['gameweek'] >= start_gw) & 
            (self.fixtures_df['gameweek'] <= end_gw)
        ].copy()
        
        difficulties = []
        
        # üìä Z-SCORE STANDARDIZATION (not min-max normalization)
        # Attack and defense have different components/scales
        # Z-scores put both on same statistical scale for valid comparison
        attack_mean = self.team_rankings['attack_strength'].mean()
        attack_std = self.team_rankings['attack_strength'].std()
        defense_mean = self.team_rankings['defense_strength'].mean()
        defense_std = self.team_rankings['defense_strength'].std()
        
        # Prevent division by zero
        if attack_std == 0:
            attack_std = 1
        if defense_std == 0:
            defense_std = 1
        
        # Also calculate min/max for home/away boost calculations
        max_attack = self.team_rankings['attack_strength'].max()
        min_attack = self.team_rankings['attack_strength'].min()
        max_defense = self.team_rankings['defense_strength'].max()
        min_defense = self.team_rankings['defense_strength'].min()
        
        attack_range = max_attack - min_attack if max_attack != min_attack else 1
        defense_range = max_defense - min_defense if max_defense != min_defense else 1
        
        for _, fixture in fixtures_period.iterrows():
            home_team = self.team_mapping.get(fixture['home_team'], fixture['home_team'])
            away_team = self.team_mapping.get(fixture['away_team'], fixture['away_team'])
            
            if home_team in self.team_rankings.index and away_team in self.team_rankings.index:
                home_stats = self.team_rankings.loc[home_team]
                away_stats = self.team_rankings.loc[away_team]
                
                # Get base strength scores
                home_attack_strength = float(home_stats['attack_strength'])
                home_defense_strength = float(home_stats['defense_strength'])
                away_attack_strength = float(away_stats['attack_strength'])
                away_defense_strength = float(away_stats['defense_strength'])
                
                # Store original ranks for display
                home_attack_rank = int(home_stats['attack_rank'])
                away_defense_rank = int(away_stats['defense_rank'])
                away_attack_rank = int(away_stats['attack_rank'])
                home_defense_rank = int(home_stats['defense_rank'])
                
                # üè† Apply HOME advantage: boost home team's strengths
                if self.home_away_df is not None and home_team in self.home_away_df.index:
                    attack_boost = self.home_away_df.loc[home_team, 'attack_rank_boost']
                    defense_boost = self.home_away_df.loc[home_team, 'defense_rank_boost']
                    # Boost proportional to the strength range
                    home_attack_strength += attack_boost * (attack_range / len(self.team_rankings))
                    home_defense_strength += defense_boost * (defense_range / len(self.team_rankings))
                
                # üöÄ Apply AWAY advantage: boost away team's strengths if they're strong away
                if self.home_away_df is not None and away_team in self.home_away_df.index:
                    away_attack_boost = self.home_away_df.loc[away_team, 'away_attack_rank_boost']
                    away_defense_boost = self.home_away_df.loc[away_team, 'away_defense_rank_boost']
                    away_attack_strength += away_attack_boost * (attack_range / len(self.team_rankings))
                    away_defense_strength += away_defense_boost * (defense_range / len(self.team_rankings))
                
                # üîß Z-SCORE STANDARDIZATION (critical for comparing different scales!)
                # Attack: goals/assists/shots (range ~1-3)
                # Defense: CS rates/inverted GC/normalized actions (range ~0.3-1.0)
                # Z-scores convert both to "standard deviations from mean"
                # Example: +2.0 = 2 SDs above average (elite), -1.5 = 1.5 SDs below (weak)
                home_attack_zscore = (home_attack_strength - attack_mean) / attack_std
                away_defense_zscore = (away_defense_strength - defense_mean) / defense_std
                home_defense_zscore = (home_defense_strength - defense_mean) / defense_std
                away_attack_zscore = (away_attack_strength - attack_mean) / attack_std
                
                # ATTACK THREAT: Z-scored home attack vs Z-scored away defense
                # Both on same scale (standard deviations), comparison is now VALID
                # Positive = Home attack stronger than away defense (favorable)
                # Typical range: -4 to +4 (extreme mismatch to extreme mismatch)
                attack_threat = home_attack_zscore - away_defense_zscore
                
                # DEFENSE STABILITY: Z-scored home defense vs Z-scored away attack
                # Positive = Home defense stronger than away attack (favorable)
                defense_stability = home_defense_zscore - away_attack_zscore
                
                # Convert to 0-10 scale
                # Z-score differences typically range -4 to +4
                # Map: +4 = 10 (very easy), 0 = 5 (neutral), -4 = 0 (very hard)
                attack_difficulty = ((attack_threat + 4) / 8) * 10
                defense_difficulty = ((defense_stability + 4) / 8) * 10
                
                # Clamp to 0-10 range (in case of extreme outliers beyond ¬±4 SD)
                attack_difficulty = max(0, min(10, attack_difficulty))
                defense_difficulty = max(0, min(10, defense_difficulty))
                
                difficulties.append({
                    'gameweek': fixture['gameweek'],
                    'home_team': fixture['home_team'],
                    'away_team': fixture['away_team'],
                    'mapped_home': home_team,
                    'mapped_away': away_team,
                    'attack_difficulty': attack_difficulty,
                    'defense_difficulty': defense_difficulty,
                    'overall_difficulty': (attack_difficulty + defense_difficulty) / 2,
                    'home_attack_rank': home_attack_rank,
                    'away_defense_rank': away_defense_rank,
                    'home_defense_rank': home_defense_rank,
                    'away_attack_rank': away_attack_rank,
                    'attack_strength_diff': attack_threat,
                    'defense_strength_diff': defense_stability
                })
        
        return pd.DataFrame(difficulties)

    def export_fixture_data(self, num_gameweeks=6):
        """
        Export fixture data and team summaries to JSON for front-end
        
        FDR Calculation:
        - Based on attacking_fixture_rating and defensive_fixture_rating percentages
        - Higher percentage = easier fixture = lower FDR (1-10 scale)
        - Example: 85% attacking rating ‚Üí FDR 2.0 (very easy)
        - Example: 15% attacking rating ‚Üí FDR 9.0 (very hard)
        
        Home/Away Advantage System:
        - HOME team gets boost if they perform better at home (rank improves)
        - AWAY team gets boost if they perform better away (rank improves)
        - Both boosts applied simultaneously, creating balanced fixture difficulty
        - Example: Liverpool (strong home) vs Man City (strong away) = reduced net advantage
        
        Home Advantage Score:
        - Sum of attack_difficulty + defense_difficulty scores
        - Positive value = favorable matchup for home team
        - Negative value = unfavorable matchup for home team
        - Zero = neutral matchup
        - Calculation: (opponent_defense_rank - home_attack_rank) + (opponent_attack_rank - home_defense_rank)
        """
        
        def score_to_attacking_probability(difficulty_score):
            """
            Convert 0-10 difficulty score to attacking probability percentage
            Score 10 = Very favorable (90%)
            Score 5 = Neutral (50%)
            Score 0 = Very unfavorable (10%)
            
            BALANCED SCALE: Uses same 10-90% range as defense for fair comparison
            """
            # Linear conversion: 0-10 score to 10-90% probability
            probability = 10 + (difficulty_score * 8.0)
            return round(max(10, min(90, probability)), 1)
        
        def score_to_defensive_probability(difficulty_score):
            """
            Convert 0-10 difficulty score to defensive probability percentage
            Score 10 = Very favorable for clean sheet (90%)
            Score 5 = Neutral (50%)
            Score 0 = Very unfavorable (10%)
            
            BALANCED SCALE: Uses same 10-90% range as attack for fair comparison
            """
            # Linear conversion: 0-10 score to 10-90% probability
            probability = 10 + (difficulty_score * 8.0)
            return round(max(10, min(90, probability)), 1)
        
        def percentage_to_fdr(percentage):
            """
            Convert percentage rating to FDR (1-10 scale, lower = easier)
            - High percentage (85%) = Easy fixture = Low FDR (1-2)
            - Low percentage (15%) = Hard fixture = High FDR (8-9)
            - Neutral (50%) = Medium FDR (5)
            """
            fdr = 10.5 - (percentage / 10)
            return max(1, min(10, round(fdr, 1)))
        
        def get_team_info(team):
            mapped_team = self.team_mapping.get(team, team)
            team_data = self.season_stats[self.season_stats['team_name'] == mapped_team]
            team_short = team_data['team_name_short'].iloc[0] if 'team_name_short' in team_data.columns and not team_data.empty else team
            
            if mapped_team in self.team_rankings.index:
                ranks = self.team_rankings.loc[mapped_team]
                return team_short, int(ranks['attack_rank']), int(ranks['defense_rank'])
            return team_short, None, None
        
        # 1. Fixtures Data
        start_gw = self.start_gw
        max_export_gw = min(start_gw + 10, 38)
        difficulty_matrix = self.get_fixture_difficulty_matrix(start_gw=start_gw, end_gw=max_export_gw)
        fixtures_data = []
        
        # Get normalization ranges (same as in get_fixture_difficulty_matrix)
        max_attack = self.team_rankings['attack_strength'].max()
        min_attack = self.team_rankings['attack_strength'].min()
        max_defense = self.team_rankings['defense_strength'].max()
        min_defense = self.team_rankings['defense_strength'].min()
        
        max_possible_attack_diff = max_attack - min_defense
        min_possible_attack_diff = min_attack - max_defense
        max_possible_defense_diff = max_defense - min_attack
        min_possible_defense_diff = min_defense - max_attack
        strength_per_rank = (max_attack - min_attack) / len(self.team_rankings)
        
        for _, fixture in difficulty_matrix.iterrows():
            home_team = fixture['home_team']
            away_team = fixture['away_team']
            gw = int(fixture['gameweek'])
            mapped_home = fixture['mapped_home']
            mapped_away = fixture['mapped_away']
            
            home_att_score = round(fixture['attack_difficulty'], 1)
            home_def_score = round(fixture['defense_difficulty'], 1)
            
            home_short, home_att_rank, home_def_rank = get_team_info(home_team)
            away_short, away_att_rank, away_def_rank = get_team_info(away_team)
            
            away_att_score = 0.0
            away_def_score = 0.0
            if mapped_away in self.team_rankings.index and mapped_home in self.team_rankings.index:
                away_stats = self.team_rankings.loc[mapped_away]
                home_stats = self.team_rankings.loc[mapped_home]
                
                # Get base strength scores
                away_attack_strength = float(away_stats['attack_strength'])
                away_defense_strength = float(away_stats['defense_strength'])
                home_attack_strength = float(home_stats['attack_strength'])
                home_defense_strength = float(home_stats['defense_strength'])
                
                # Get z-score parameters (same as in get_fixture_difficulty_matrix)
                attack_mean = self.team_rankings['attack_strength'].mean()
                attack_std = self.team_rankings['attack_strength'].std()
                defense_mean = self.team_rankings['defense_strength'].mean()
                defense_std = self.team_rankings['defense_strength'].std()
                
                if attack_std == 0:
                    attack_std = 1
                if defense_std == 0:
                    defense_std = 1
                
                attack_range = max_attack - min_attack if max_attack != min_attack else 1
                defense_range = max_defense - min_defense if max_defense != min_defense else 1
                
                # üöÄ Apply AWAY team's away advantage
                if self.home_away_df is not None and mapped_away in self.home_away_df.index:
                    away_attack_boost = self.home_away_df.loc[mapped_away, 'away_attack_rank_boost']
                    away_defense_boost = self.home_away_df.loc[mapped_away, 'away_defense_rank_boost']
                    away_attack_strength += away_attack_boost * (attack_range / len(self.team_rankings))
                    away_defense_strength += away_defense_boost * (defense_range / len(self.team_rankings))
                
                # üè† Apply HOME team's home advantage
                if self.home_away_df is not None and mapped_home in self.home_away_df.index:
                    home_attack_boost = self.home_away_df.loc[mapped_home, 'attack_rank_boost']
                    home_defense_boost = self.home_away_df.loc[mapped_home, 'defense_rank_boost']
                    home_attack_strength += home_attack_boost * (attack_range / len(self.team_rankings))
                    home_defense_strength += home_defense_boost * (defense_range / len(self.team_rankings))
                
                # üîß Z-SCORE STANDARDIZATION (same as home team calculation)
                away_attack_zscore = (away_attack_strength - attack_mean) / attack_std
                home_defense_zscore = (home_defense_strength - defense_mean) / defense_std
                away_defense_zscore = (away_defense_strength - defense_mean) / defense_std
                home_attack_zscore = (home_attack_strength - attack_mean) / attack_std
                
                # AWAY ATTACK THREAT: Away attack vs Home defense (z-scores)
                away_attack_threat = away_attack_zscore - home_defense_zscore
                
                # Convert z-score difference to 0-10 scale (¬±4 SD range)
                away_att_score = ((away_attack_threat + 4) / 8) * 10
                away_att_score = round(max(0, min(10, away_att_score)), 1)
                
                # AWAY DEFENSE STABILITY: Away defense vs Home attack (z-scores)
                away_defense_stability = away_defense_zscore - home_attack_zscore
                
                # Convert z-score difference to 0-10 scale (¬±4 SD range)
                away_def_score = ((away_defense_stability + 4) / 8) * 10
                away_def_score = round(max(0, min(10, away_def_score)), 1)
            
            home_attack_pct = score_to_attacking_probability(home_att_score)
            home_defense_pct = score_to_defensive_probability(home_def_score)
            away_attack_pct = score_to_attacking_probability(away_att_score)
            away_defense_pct = score_to_defensive_probability(away_def_score)
            
            # Convert percentages to FDR ratings
            home_attack_fdr = percentage_to_fdr(home_attack_pct)
            home_defense_fdr = percentage_to_fdr(home_defense_pct)
            away_attack_fdr = percentage_to_fdr(away_attack_pct)
            away_defense_fdr = percentage_to_fdr(away_defense_pct)
            
            home_overall_fdr = round((home_attack_fdr + home_defense_fdr) / 2, 1)
            away_overall_fdr = round((away_attack_fdr + away_defense_fdr) / 2, 1)
            
            # Home advantage score: positive = favorable, negative = unfavorable
            # Based on combined attack + defense difficulty scores
            home_advantage_score = round(home_att_score + home_def_score, 1)
            
            fixture_data = {
                'gameweek': gw,
                'fixture': f"{home_team} vs {away_team}",
                'home_team': {
                    'name': home_team,
                    'short_name': home_short,
                    'attacking_fixture_rating': home_attack_pct,
                    'defensive_fixture_rating': home_defense_pct,
                    'rank': {
                        'attack': home_att_rank,
                        'defense': home_def_rank
                    },
                    'fdr': {
                        'attack': home_attack_fdr,
                        'defense': home_defense_fdr,
                        'overall': home_overall_fdr
                    }
                },
                'away_team': {
                    'name': away_team,
                    'short_name': away_short,
                    'attacking_fixture_rating': away_attack_pct,
                    'defensive_fixture_rating': away_defense_pct,
                    'rank': {
                        'attack': away_att_rank,
                        'defense': away_def_rank
                    },
                    'fdr': {
                        'attack': away_attack_fdr,
                        'defense': away_defense_fdr,
                        'overall': away_overall_fdr
                    }
                }
            }
            fixtures_data.append(fixture_data)
        
        # 2. Team Fixture Summary with ALL original fields
        end_gw = start_gw + num_gameweeks - 1
        all_difficulties = self.get_fixture_difficulty_matrix(start_gw, end_gw)
        
        if all_difficulties.empty:
            print("‚ùå No fixture difficulty data available")
            return []

        team_summary = []
        fixture_teams = set(all_difficulties['home_team'].unique()) | set(all_difficulties['away_team'].unique())

        for team in fixture_teams:
            team_fixtures = all_difficulties[(all_difficulties['home_team'] == team) | (all_difficulties['away_team'] == team)].copy()
            if len(team_fixtures) == 0:
                continue
            
            # Sort by gameweek to ensure proper period calculation
            team_fixtures = team_fixtures.sort_values('gameweek')

            # Track all fixtures and split into periods
            attack_scores = []
            defense_scores = []
            all_home_fixtures = 0
            near_term_home_fixtures = 0
            medium_term_home_fixtures = 0
            
            near_term_fixtures = []  # GW 1-3
            medium_term_fixtures = []  # GW 4-6
            fixture_count = 0
            
            for _, fixture in team_fixtures.iterrows():
                is_home = fixture['home_team'] == team
                
                if is_home:
                    attack_diff = fixture['attack_difficulty']
                    defense_diff = fixture['defense_difficulty']
                    all_home_fixtures += 1
                    
                    # Count home fixtures by period
                    if fixture_count < 3:
                        near_term_home_fixtures += 1
                    elif fixture_count < 6:
                        medium_term_home_fixtures += 1
                else:
                    mapped_away = fixture['mapped_away']
                    mapped_home = fixture['mapped_home']
                    
                    if mapped_away in self.team_rankings.index and mapped_home in self.team_rankings.index:
                        away_stats = self.team_rankings.loc[mapped_away]
                        home_stats = self.team_rankings.loc[mapped_home]
                        
                        # Get base strength scores
                        away_attack_strength = float(away_stats['attack_strength'])
                        away_defense_strength = float(away_stats['defense_strength'])
                        home_attack_strength = float(home_stats['attack_strength'])
                        home_defense_strength = float(home_stats['defense_strength'])
                        
                        # Apply home/away advantages
                        if self.home_away_df is not None:
                            if mapped_away in self.home_away_df.index:
                                away_attack_boost = self.home_away_df.loc[mapped_away, 'away_attack_rank_boost']
                                away_defense_boost = self.home_away_df.loc[mapped_away, 'away_defense_rank_boost']
                                away_attack_strength += away_attack_boost * 0.015
                                away_defense_strength += away_defense_boost * 0.015
                            
                            if mapped_home in self.home_away_df.index:
                                home_attack_boost = self.home_away_df.loc[mapped_home, 'attack_rank_boost']
                                home_defense_boost = self.home_away_df.loc[mapped_home, 'defense_rank_boost']
                                home_attack_strength += home_attack_boost * 0.015
                                home_defense_strength += home_defense_boost * 0.015
                        
                        # Calculate away team's perspective
                        away_attack_threat = away_attack_strength - home_defense_strength
                        attack_diff = (away_attack_threat + 0.5) * 10
                        attack_diff = max(0, min(10, attack_diff))
                        
                        away_defense_stability = away_defense_strength - home_attack_strength
                        defense_diff = (away_defense_stability + 0.5) * 10
                        defense_diff = max(0, min(10, defense_diff))
                    else:
                        continue

                attack_scores.append(attack_diff)
                defense_scores.append(defense_diff)
                
                # Split fixtures by period
                if fixture_count < 3:  # Near-term: GW 1-3
                    near_term_fixtures.append({
                        'attack': score_to_attacking_probability(attack_diff),
                        'defense': score_to_defensive_probability(defense_diff),
                        'attack_diff': attack_diff,
                        'defense_diff': defense_diff
                    })
                elif fixture_count < 6:  # Medium-term: GW 4-6
                    medium_term_fixtures.append({
                        'attack': score_to_attacking_probability(attack_diff),
                        'defense': score_to_defensive_probability(defense_diff),
                        'attack_diff': attack_diff,
                        'defense_diff': defense_diff
                    })
                
                fixture_count += 1

            avg_attack_diff = round(np.mean(attack_scores), 3) if attack_scores else 0
            avg_defense_diff = round(np.mean(defense_scores), 3) if defense_scores else 0
            overall_diff = round((avg_attack_diff + avg_defense_diff) / 2, 3)
            
            # Near-term rating (GW 1-3 average)
            near_term_rating = 0
            if near_term_fixtures:
                near_term_scores = [(f['attack'] + f['defense']) / 2 for f in near_term_fixtures]
                near_term_rating = round(np.mean(near_term_scores), 1)
            
            # Medium-term rating (GW 4-6 average)
            medium_term_rating = 0
            if medium_term_fixtures:
                medium_term_scores = [(f['attack'] + f['defense']) / 2 for f in medium_term_fixtures]
                medium_term_rating = round(np.mean(medium_term_scores), 1)
            
            # Fixture swing (how much fixtures improve/worsen from near to medium term)
            fixture_swing = round(medium_term_rating - near_term_rating, 1)
            
            # Swing category
            if fixture_swing > 10:
                swing_category = "Improving Fixtures"
            elif fixture_swing < -10:
                swing_category = "Worsening Fixtures"
            else:
                swing_category = "Stable Fixtures"
            
            # Form context
            form_context = "consistent"
            if abs(fixture_swing) > 15:
                form_context = "volatile"

            team_summary.append({
                'team': team,
                'avg_attack_difficulty': avg_attack_diff,
                'avg_defense_difficulty': avg_defense_diff,
                'overall_difficulty': overall_diff,
                'near_term_home_fixtures': near_term_home_fixtures,
                'medium_term_home_fixtures': medium_term_home_fixtures,
                'near_term_rating': near_term_rating,
                'medium_term_rating': medium_term_rating,
                'fixture_swing': fixture_swing,
                'swing_category': swing_category,
                'form_context': form_context
            })
        
        # Write JSON files
        with open('backend/data/fixture_analysis/fixtures.json', 'w') as f:
            json.dump(fixtures_data, f, indent=2)
        print("‚úÖ Exported fixtures to backend/data/fixture_analysis/fixtures.json")
        
        with open('backend/data/fixture_analysis/team_fixture_summary.json', 'w') as f:
            json.dump(team_summary, f, indent=2)
        print("‚úÖ Exported team fixture summary to backend/data/fixture_analysis/team_fixture_summary.json")

# Initialization block
print("üîÆ INITIALIZING ENHANCED FIXTURE ANALYZER...")
print("=" * 60)
try:
    analyzer = EnhancedFixtureAnalyzer(season_stats, team_rankings, 'fixture_template.csv', home_away_df=home_away_df)
    print("‚úÖ Analyzer initialized successfully!")
    print(f"üìä Fixture data loaded: {len(analyzer.fixtures_df)} fixtures")
    print(f"üè† Using dynamic home advantage for {len(home_away_df)} teams")
    
    missing_mappings = [team for team, mapped in analyzer.team_mapping.items() 
                       if mapped not in analyzer.team_rankings.index and mapped == team]
    
    if missing_mappings:
        print(f"‚ö†Ô∏è Teams without ranking data: {', '.join(missing_mappings[:5])}")
        print("   (These teams will be skipped in analysis)")
    else:
        print("‚úÖ All teams successfully mapped to ranking data")
    
    print("\nüéØ ENHANCED FIXTURE ANALYZER READY!")
    
except Exception as e:
    print(f"‚ùå Error initializing analyzer: {e}")
    print("Please check that 'fixture_template.csv' exists and has the correct format")
    import traceback
    traceback.print_exc()

# Export fixture data to JSON
if 'analyzer' in locals():
    print("\n" + "="*70)
    print("üì§ EXPORTING FIXTURE DATA TO JSON")
    print("="*70)
    analyzer.export_fixture_data()

# ‚úÖ Feature Validation

In [None]:
import os
import json

print("="*80)
print("‚úÖ FEATURE VALIDATION & FLOW CHECK")
print("="*80)

# Check 1: Form-Weighted Rankings
print("\n1Ô∏è‚É£ FORM-WEIGHTED TEAM RANKINGS")
print("-" * 80)
if 'team_rankings' in locals():
    print(f"‚úÖ Team rankings available: {len(team_rankings)} teams")
    print(f"   Columns: {', '.join(team_rankings.columns.tolist())}")
    print(f"   Attack Rank Range: {team_rankings['attack_rank'].min()}-{team_rankings['attack_rank'].max()}")
    print(f"   Defense Rank Range: {team_rankings['defense_rank'].min()}-{team_rankings['defense_rank'].max()}")
else:
    print("‚ùå Team rankings not found")

# Check 2: Dynamic Home Advantage
print("\n2Ô∏è‚É£ DYNAMIC HOME ADVANTAGE")
print("-" * 80)
if 'home_away_df' in locals():
    print(f"‚úÖ Home/away advantage data available: {len(home_away_df)} teams")
    
    # Convert to numeric if needed
    df_numeric = home_away_df.copy()
    df_numeric['attack_advantage_factor'] = pd.to_numeric(df_numeric['attack_advantage_factor'], errors='coerce')
    df_numeric['defense_advantage_factor'] = pd.to_numeric(df_numeric['defense_advantage_factor'], errors='coerce')
    
    atk_min = df_numeric['attack_advantage_factor'].min()
    atk_max = df_numeric['attack_advantage_factor'].max()
    def_min = df_numeric['defense_advantage_factor'].min()
    def_max = df_numeric['defense_advantage_factor'].max()
    
    print(f"   Attack Advantage Range: {atk_min:+.1%} to {atk_max:+.1%}")
    print(f"   Defense Advantage Range: {def_min:+.1%} to {def_max:+.1%}")
    
    # Show top home teams
    top_home = df_numeric.nlargest(3, 'attack_advantage_factor')
    print(f"\n   üèÜ Teams with Best Home Attack Performance:")
    for team, row in top_home.iterrows():
        print(f"      ‚Ä¢ {team}: {row['attack_advantage_factor']:+.1%} (Rank boost: {row['attack_rank_boost']:+.2f})")
else:
    print("‚ùå Home/away advantage data not found")

# Check 3: Fixture Analyzer
print("\n3Ô∏è‚É£ ENHANCED FIXTURE ANALYZER")
print("-" * 80)
if 'analyzer' in locals():
    print(f"‚úÖ Fixture analyzer initialized successfully")
    print(f"   Teams mapped: {len(analyzer.team_mapping)}")
    print(f"   Total fixtures: {len(analyzer.fixtures_df)}")
    print(f"   Gameweeks: {analyzer.fixtures_df['gameweek'].min()} to {analyzer.fixtures_df['gameweek'].max()}")
else:
    print("‚ùå Fixture analyzer not initialized")

# Check 4: JSON Exports
print("\n4Ô∏è‚É£ JSON EXPORTS")
print("-" * 80)
export_files = {
    'fixtures.json': 'backend/data/fixture_analysis/fixtures.json',
    'fixture_opportunities.json': 'backend/data/fixture_analysis/fixture_opportunities.json',
    'team_fixture_summary.json': 'backend/data/fixture_analysis/team_fixture_summary.json',
    'all_players.json': 'backend/data/player_trends/all_players.json',
    'player_data.json': 'backend/data/player_trends/player_data.json',
}

all_files_exist = True
for filename, filepath in export_files.items():
    if os.path.exists(filepath):
        size_kb = os.path.getsize(filepath) / 1024
        print(f"‚úÖ {filename:<35} ({size_kb:>7.1f} KB)")
    else:
        print(f"‚ùå {filename:<35} NOT FOUND")
        all_files_exist = False

# Check 5: Data Validation
print("\n5Ô∏è‚É£ DATA VALIDATION")
print("-" * 80)

# Sample fixture data to verify structure
if os.path.exists('backend/data/fixture_analysis/fixtures.json'):
    with open('backend/data/fixture_analysis/fixtures.json', 'r') as f:
        fixtures = json.load(f)
    
    if fixtures and len(fixtures) > 0:
        sample_fixture = fixtures[0]
        required_keys = ['gameweek', 'fixture', 'home_team', 'away_team']
        home_keys = ['name', 'short_name', 'attacking_fixture_rating', 'defensive_fixture_rating', 'rank']
        
        fixture_ok = all(key in sample_fixture for key in required_keys)
        home_ok = all(key in sample_fixture['home_team'] for key in home_keys)
        
        if fixture_ok and home_ok:
            print(f"‚úÖ Fixture JSON structure valid")
            print(f"   Total fixtures: {len(fixtures)}")
            print(f"   Sample fixture: {sample_fixture['home_team']['name']} vs {sample_fixture['away_team']['name']} (GW{sample_fixture['gameweek']})")
            print(f"   Home Attack Rating: {sample_fixture['home_team']['attacking_fixture_rating']}")
            print(f"   Away Defense Rating: {sample_fixture['away_team']['defensive_fixture_rating']}")
        else:
            print(f"‚ùå Fixture JSON structure invalid")
            print(f"   Missing keys detected")
else:
    print("‚ùå fixtures.json not found for validation")

# Final Summary
print("\n" + "="*80)
print("üéØ FEATURE FLOW SUMMARY")
print("="*80)

summary = {
    'Form-Weighted Rankings': 'team_rankings' in locals(),
    'Dynamic Home Advantage': 'home_away_df' in locals(),
    'Fixture Analyzer': 'analyzer' in locals(),
    'JSON Exports': all_files_exist,
}

completed = sum(1 for v in summary.values() if v)
total = len(summary)

for feature, status in summary.items():
    symbol = "‚úÖ" if status else "‚ùå"
    print(f"{symbol} {feature}")

print(f"\nüìä COMPLETION: {completed}/{total} features working")

if completed == total:
    print("\nüéâ ALL FEATURES INTEGRATED & FLOWING PROPERLY!")
    print("   ‚úÖ Form weighting applied to team rankings")
    print("   ‚úÖ Dynamic home advantage calculated")
    print("   ‚úÖ Fixture analyzer enhanced with both features")
    print("   ‚úÖ All 5 JSON files exported successfully")
    print("   ‚úÖ No duplicates, clean integration")
else:
    print(f"\n‚ö†Ô∏è  {total - completed} feature(s) need attention")

# üìä Player Trends Export

In [None]:
# Convert player data to JSON for faster API performance
import json
import os

def convert_players_to_json():
    """Convert player data from CSV to JSON format"""
    
    # Create player_trends directory
    output_dir = 'backend/data/player_trends'
    os.makedirs(output_dir, exist_ok=True)
    
    print("Converting player data to JSON...")
    
    # Load the CSV data
    df_players = pd.read_csv('fpl-data-stats.csv')
    
    # Fill NaN values first before type conversion
    df_players = df_players.fillna({
        'web_name': 'Unknown',
        'team_name': 'Unknown', 
        'opponent_team_name': 'Unknown',
        'was_home': False,
        'touches': 0,
        'penalty_area_touches': 0,
        'carries_final_third': 0,
        'key_passes': 0,
        'shots': 0,
        'SoT': 0,
        'G': 0,
        'A': 0,
        'CS': 0,
        'GC': 0,
        'minutes': 0,
        'total_points': 0,
        'now_cost': 0,
        'selected_by_percent': 0,
        'xG': 0,
        'xA': 0,
        'xGI': 0,
        'xP': 0,
        'xGC': 0,
        'defensive_contribution': 0
    })
    
    # Convert to native Python types to avoid JSON serialization issues
    df_players = df_players.astype({
        'id': 'int32',
        'element_type': 'int32', 
        'gameweek': 'int32',
        'minutes': 'int32',
        'total_points': 'float32',
        'G': 'int32',
        'A': 'int32',
        'CS': 'int32',
        'shots': 'int32',
        'SoT': 'int32',
        'key_passes': 'int32',
        'touches': 'int32',
        'penalty_area_touches': 'int32',
        'carries_final_third': 'int32',
        'GC': 'int32',
        'now_cost': 'float32',
        'selected_by_percent': 'float32',
        'xG': 'float32',
        'xA': 'float32',
        'xGI': 'float32',
        'xP': 'float32',
        'xGC': 'float32',
        'defensive_contribution': 'float32'
    })
    
    # Convert boolean columns
    df_players['was_home'] = df_players['was_home'].astype(bool)
    
    # Create all_players.json (list of unique players for search)
    latest_gw = df_players.groupby('id')['gameweek'].max()
    unique_players = df_players[df_players.apply(lambda row: row['gameweek'] == latest_gw[row['id']], axis=1)]
    
    players_list = []
    for _, row in unique_players.iterrows():
        players_list.append({
            "id": int(row['id']),
            "name": str(row['web_name']),
            "team": str(row['team_name']),
            "position": int(row['element_type']),
            "cost": round(float(row['now_cost']), 2),
            "ownership": round(float(row['selected_by_percent']), 2)
        })
    
    players_list.sort(key=lambda x: x['name'])
    
    # Save all_players.json
    with open(f'{output_dir}/all_players.json', 'w') as f:
        json.dump({
            "players": players_list,
            "count": len(players_list)
        }, f, indent=2)
    
    print(f"‚úÖ Saved {len(players_list)} players to all_players.json")
    
    # Create player_data.json (all gameweek data organized by player)
    player_data = {}
    
    for player_name in df_players['web_name'].dropna().unique():
        player_gw_data = df_players[df_players['web_name'] == player_name].copy()
        
        if player_gw_data.empty:
            continue
            
        # Sort by gameweek
        player_gw_data = player_gw_data.sort_values('gameweek')
        
        # Get player info from most recent gameweek
        player_info = player_gw_data.iloc[-1]
        
        # Calculate form (last 5 GWs)
        last_5_gws = player_gw_data.tail(5)
        form_stats = {
            "avg_points": round(float(last_5_gws['total_points'].mean()), 1),
            "avg_minutes": round(float(last_5_gws['minutes'].mean()), 0),
            "games_played": int(len(last_5_gws))
        }
        
        # Gameweek data
        gameweeks = []
        for _, row in player_gw_data.iterrows():
            gameweeks.append({
                "gameweek": int(row['gameweek']),
                "opponent": str(row['opponent_team_name']),
                "was_home": bool(row['was_home']),
                "total_points": float(row['total_points']),
                "minutes": int(row['minutes']),
                "goals": int(row['G']),
                "assists": int(row['A']),
                "clean_sheets": int(row['CS']),
                "xG": round(float(row['xG']), 2),
                "xA": round(float(row['xA']), 2),
                "xGI": round(float(row['xGI']), 2),
                "xP": round(float(row['xP']), 2),
                "shots": int(row['shots']),
                "shots_on_target": int(row['SoT']),
                "key_passes": int(row['key_passes']),
                "touches": int(row['touches']),
                "penalty_area_touches": int(row['penalty_area_touches']),
                "carries_final_third": int(row['carries_final_third']),
                "defensive_contribution": round(float(row['defensive_contribution']), 2),
                "xGC": round(float(row['xGC']), 2),
                "goals_conceded": int(row['GC'])
            })
        
        # Total stats
        total_minutes = int(player_gw_data['minutes'].sum())
        total_stats = {
            "games_played": int(len(player_gw_data)),
            "total_points": int(player_gw_data['total_points'].sum()),
            "total_goals": int(player_gw_data['G'].sum()),
            "total_assists": int(player_gw_data['A'].sum()),
            "total_xG": round(float(player_gw_data['xG'].sum()), 2),
            "total_xA": round(float(player_gw_data['xA'].sum()), 2),
            "total_xGI": round(float(player_gw_data['xGI'].sum()), 2),
            "total_xP": round(float(player_gw_data['xP'].sum()), 2),
            "total_minutes": total_minutes,
            "total_shots": int(player_gw_data['shots'].sum()),
            "total_key_passes": int(player_gw_data['key_passes'].sum())
        }
        
        # Per-90 stats
        per90_stats = {
            "points_per_90": round((total_stats["total_points"] * 90) / max(total_minutes, 1), 2),
            "goals_per_90": round((total_stats["total_goals"] * 90) / max(total_minutes, 1), 2),
            "assists_per_90": round((total_stats["total_assists"] * 90) / max(total_minutes, 1), 2),
            "xG_per_90": round((total_stats["total_xG"] * 90) / max(total_minutes, 1), 2),
            "xA_per_90": round((total_stats["total_xA"] * 90) / max(total_minutes, 1), 2),
            "xGI_per_90": round((total_stats["total_xGI"] * 90) / max(total_minutes, 1), 2),
            "shots_per_90": round((total_stats["total_shots"] * 90) / max(total_minutes, 1), 2),
            "key_passes_per_90": round((total_stats["total_key_passes"] * 90) / max(total_minutes, 1), 2)
        }
        
        # Store player data
        player_data[player_name] = {
            "player_name": str(player_name),
            "team": str(player_info['team_name']),
            "position": int(player_info['element_type']),
            "web_name": str(player_info['web_name']),
            "cost": round(float(player_info['now_cost']), 2),
            "ownership": round(float(player_info['selected_by_percent']), 2),
            "form": form_stats,
            "total_stats": total_stats,
            "per90_stats": per90_stats,
            "gameweeks": gameweeks
        }
    
    # Save player_data.json
    with open(f'{output_dir}/player_data.json', 'w') as f:
        json.dump(player_data, f, indent=2)
    
    print(f"‚úÖ Saved detailed data for {len(player_data)} players to player_data.json")
    print(f"üìÅ Files created in: {output_dir}/")
    
    return len(players_list), len(player_data)

# Run the conversion
player_count, detail_count = convert_players_to_json()
print(f"\nüéØ Conversion complete:")
print(f"   - {player_count} players in search index")  
print(f"   - {detail_count} players with detailed stats")

# üîÑ REDESIGNED SYSTEM: ELO-Based Rankings + Expected Points Fixture Analysis

**See `REDESIGNED_APPROACH.md` for full methodology**

This cell implements a simpler, more accurate system based on:
1. **ELO ratings** (like chess/FIFA) - self-correcting, proven
2. **Expected FPL points** - directly actionable
3. **No arbitrary weights** - statistically sound

In [41]:
import pandas as pd
import numpy as np
import json

# ============================================================================
# 1Ô∏è‚É£ ELO-BASED TEAM RATINGS
# ============================================================================

def calculate_team_elo_ratings(df: pd.DataFrame, k_factor=32, initial_rating=1500):
    """
    Calculate ELO-style ratings for teams based on match results.
    
    Why ELO?
    - Self-correcting: automatically adjusts based on results
    - Simple: one number per team
    - Recent results naturally matter more (latest updates persist)
    - Proven: used in chess, FIFA rankings, sports betting
    
    Args:
        df: DataFrame with gameweek data
        k_factor: Sensitivity to results (32=responsive, 16=stable, 64=volatile)
        initial_rating: Starting rating (1500 is standard)
    
    Returns:
        DataFrame with team ratings and ranks
    """
    
    # Initialize ratings for all teams
    teams = df['team_name'].unique()
    ratings = {team: {
        'attack_elo': initial_rating,
        'defense_elo': initial_rating,
        'games': 0
    } for team in teams}
    
    # Process each gameweek chronologically
    for gw in sorted(df['gameweek'].unique()):
        gw_data = df[df['gameweek'] == gw]
        
        # Get team-level match data (use defenders for CS/GC stats)
        matches = gw_data[gw_data['element_type'].isin([1, 2])].groupby(
            ['team_name', 'opponent_team_name', 'was_home']
        ).agg({
            'G': 'max',   # Goals scored (team-level)
            'GC': 'max'   # Goals conceded (team-level)
        }).reset_index()
        
        # Update ELO for each match
        for _, match in matches.iterrows():
            team = match['team_name']
            opponent = match['opponent_team_name']
            
            if opponent not in ratings:
                continue
            
            # Current ratings
            team_attack = ratings[team]['attack_elo']
            team_defense = ratings[team]['defense_elo']
            opp_attack = ratings[opponent]['attack_elo']
            opp_defense = ratings[opponent]['defense_elo']
            
            # Expected performance (0-1 scale using ELO formula)
            expected_attack = 1 / (1 + 10 ** ((opp_defense - team_attack) / 400))
            expected_defense = 1 / (1 + 10 ** ((opp_attack - team_defense) / 400))
            
            # Actual performance (normalized to 0-1)
            # Attack: 0 goals=0.0, 1 goal=0.33, 2 goals=0.67, 3+ goals=1.0
            actual_attack = min(match['G'] / 3.0, 1.0)
            # Defense: 0 conceded=1.0, 1=0.67, 2=0.33, 3+=0.0
            actual_defense = max(1.0 - (match['GC'] / 3.0), 0.0)
            
            # Home advantage (10% boost to expected performance)
            if match['was_home']:
                expected_attack *= 1.1
                expected_defense *= 1.1
                expected_attack = min(expected_attack, 1.0)
                expected_defense = min(expected_defense, 1.0)
            
            # Update ratings based on surprise factor
            ratings[team]['attack_elo'] += k_factor * (actual_attack - expected_attack)
            ratings[team]['defense_elo'] += k_factor * (actual_defense - expected_defense)
            ratings[team]['games'] += 1
    
    # Convert to DataFrame
    ratings_df = pd.DataFrame(ratings).T
    ratings_df.index.name = 'team'
    ratings_df = ratings_df.reset_index()
    
    # Overall ELO (60% attack, 40% defense - FPL weighted)
    ratings_df['overall_elo'] = (
        ratings_df['attack_elo'] * 0.6 + 
        ratings_df['defense_elo'] * 0.4
    )
    
    # Calculate ranks
    ratings_df['attack_rank'] = ratings_df['attack_elo'].rank(ascending=False, method='dense').astype(int)
    ratings_df['defense_rank'] = ratings_df['defense_elo'].rank(ascending=False, method='dense').astype(int)
    ratings_df['overall_rank'] = ratings_df['overall_elo'].rank(ascending=False, method='dense').astype(int)
    
    return ratings_df.sort_values('overall_rank')


# ============================================================================
# 2Ô∏è‚É£ EXPECTED POINTS FIXTURE DIFFICULTY
# ============================================================================

def calculate_expected_points_fixtures(
    team_ratings: pd.DataFrame,
    fixtures_df: pd.DataFrame,
    home_advantage=0.15  # 15% boost for home team
):
    """
    Calculate fixture difficulty using expected FPL points.
    
    Why Expected Points?
    - Directly actionable for FPL decisions
    - No arbitrary "difficulty" scales
    - Separates attacker vs defender value
    - Testable against actual FPL results
    
    Returns:
        DataFrame with FDR ratings and expected points for each fixture
    """
    
    fixtures_output = []
    
    for _, fixture in fixtures_df.iterrows():
        home_team = fixture['home_team']
        away_team = fixture['away_team']
        gw = fixture['gameweek']
        
        # Get team ratings
        home_data = team_ratings[team_ratings['team'] == home_team]
        away_data = team_ratings[team_ratings['team'] == away_team]
        
        if len(home_data) == 0 or len(away_data) == 0:
            continue
        
        home_data = home_data.iloc[0]
        away_data = away_data.iloc[0]
        
        # === HOME TEAM ===
        # Probability of scoring (attack vs opponent defense)
        elo_diff_attack_home = home_data['attack_elo'] - away_data['defense_elo']
        home_scoring_prob = 1 / (1 + 10 ** (-elo_diff_attack_home / 400))
        home_scoring_prob *= (1 + home_advantage)
        home_scoring_prob = min(home_scoring_prob, 0.95)
        
        # Probability of clean sheet (defense vs opponent attack)
        elo_diff_defense_home = home_data['defense_elo'] - away_data['attack_elo']
        home_cs_prob = 1 / (1 + 10 ** (-elo_diff_defense_home / 400))
        home_cs_prob *= (1 + home_advantage)
        home_cs_prob = min(home_cs_prob, 0.95)
        
        # === AWAY TEAM ===
        elo_diff_attack_away = away_data['attack_elo'] - home_data['defense_elo']
        away_scoring_prob = 1 / (1 + 10 ** (-elo_diff_attack_away / 400))
        away_scoring_prob = min(away_scoring_prob, 0.95)
        
        elo_diff_defense_away = away_data['defense_elo'] - home_data['attack_elo']
        away_cs_prob = 1 / (1 + 10 ** (-elo_diff_defense_away / 400))
        away_cs_prob = min(away_cs_prob, 0.95)
        
        # === EXPECTED FPL POINTS ===
        # Attacker: base 2pts + goal involvement (avg 5pts per goal)
        home_attacker_xpts = 2 + (home_scoring_prob * 5.0)
        away_attacker_xpts = 2 + (away_scoring_prob * 5.0)
        
        # Defender: base 2pts + CS (4pts) - conceded penalty (-0.5 per goal)
        home_defender_xpts = 2 + (home_cs_prob * 4) - ((1 - away_scoring_prob) * 0.5)
        away_defender_xpts = 2 + (away_cs_prob * 4) - ((1 - home_scoring_prob) * 0.5)
        
        # === FDR (1-10 scale, lower = easier) ===
        # High probability ‚Üí low FDR (easy)
        home_attack_fdr = 10 - (home_scoring_prob * 9)
        home_defense_fdr = 10 - (home_cs_prob * 9)
        away_attack_fdr = 10 - (away_scoring_prob * 9)
        away_defense_fdr = 10 - (away_cs_prob * 9)
        
        fixtures_output.append({
            'gameweek': gw,
            'home_team': home_team,
            'away_team': away_team,
            # Home team
            'home_attack_fdr': round(home_attack_fdr, 1),
            'home_defense_fdr': round(home_defense_fdr, 1),
            'home_overall_fdr': round((home_attack_fdr + home_defense_fdr) / 2, 1),
            'home_attacker_xpts': round(home_attacker_xpts, 2),
            'home_defender_xpts': round(home_defender_xpts, 2),
            'home_scoring_prob': round(home_scoring_prob, 3),
            'home_cs_prob': round(home_cs_prob, 3),
            # Away team
            'away_attack_fdr': round(away_attack_fdr, 1),
            'away_defense_fdr': round(away_defense_fdr, 1),
            'away_overall_fdr': round((away_attack_fdr + away_defense_fdr) / 2, 1),
            'away_attacker_xpts': round(away_attacker_xpts, 2),
            'away_defender_xpts': round(away_defender_xpts, 2),
            'away_scoring_prob': round(away_scoring_prob, 3),
            'away_cs_prob': round(away_cs_prob, 3)
        })
    
    return pd.DataFrame(fixtures_output)


# ============================================================================
# 3Ô∏è‚É£ FIXTURE SWING ANALYSIS
# ============================================================================

def calculate_fixture_swing(
    fixtures_df: pd.DataFrame, 
    team: str, 
    near_term_gw=3, 
    medium_term_gw=6
):
    """
    Calculate how fixture difficulty changes over time.
    Now uses expected FPL points instead of abstract ratings.
    
    Returns:
        Dict with near/medium term expected points and swing values
    """
    
    # Get team's fixtures
    team_fixtures = fixtures_df[
        (fixtures_df['home_team'] == team) | (fixtures_df['away_team'] == team)
    ].copy()
    
    # Determine home/away and extract relevant stats
    team_fixtures['is_home'] = team_fixtures['home_team'] == team
    team_fixtures['attacker_xpts'] = team_fixtures.apply(
        lambda row: row['home_attacker_xpts'] if row['is_home'] else row['away_attacker_xpts'],
        axis=1
    )
    team_fixtures['defender_xpts'] = team_fixtures.apply(
        lambda row: row['home_defender_xpts'] if row['is_home'] else row['away_defender_xpts'],
        axis=1
    )
    team_fixtures['attack_fdr'] = team_fixtures.apply(
        lambda row: row['home_attack_fdr'] if row['is_home'] else row['away_attack_fdr'],
        axis=1
    )
    team_fixtures['defense_fdr'] = team_fixtures.apply(
        lambda row: row['home_defense_fdr'] if row['is_home'] else row['away_defense_fdr'],
        axis=1
    )
    
    # Split into periods
    near_term = team_fixtures[team_fixtures['gameweek'] <= near_term_gw]
    medium_term = team_fixtures[
        (team_fixtures['gameweek'] > near_term_gw) & 
        (team_fixtures['gameweek'] <= medium_term_gw)
    ]
    
    # Calculate averages
    near_attacker_xpts = near_term['attacker_xpts'].mean() if len(near_term) > 0 else 0
    medium_attacker_xpts = medium_term['attacker_xpts'].mean() if len(medium_term) > 0 else 0
    near_defender_xpts = near_term['defender_xpts'].mean() if len(near_term) > 0 else 0
    medium_defender_xpts = medium_term['defender_xpts'].mean() if len(medium_term) > 0 else 0
    
    # Fixture swings
    attacker_swing = medium_attacker_xpts - near_attacker_xpts
    defender_swing = medium_defender_xpts - near_defender_xpts
    
    # Categorize
    if attacker_swing > 0.5 or defender_swing > 0.5:
        category = "Improving Fixtures"
    elif attacker_swing < -0.5 or defender_swing < -0.5:
        category = "Worsening Fixtures"
    else:
        category = "Stable Fixtures"
    
    return {
        'team': team,
        'near_term_attacker_xpts': round(near_attacker_xpts, 2),
        'medium_term_attacker_xpts': round(medium_attacker_xpts, 2),
        'attacker_fixture_swing': round(attacker_swing, 2),
        'near_term_defender_xpts': round(near_defender_xpts, 2),
        'medium_term_defender_xpts': round(medium_defender_xpts, 2),
        'defender_fixture_swing': round(defender_swing, 2),
        'swing_category': category,
        'near_term_avg_attack_fdr': round(near_term['attack_fdr'].mean(), 2) if len(near_term) > 0 else 0,
        'near_term_avg_defense_fdr': round(near_term['defense_fdr'].mean(), 2) if len(near_term) > 0 else 0
    }


# ============================================================================
# 4Ô∏è‚É£ RUN COMPLETE ANALYSIS
# ============================================================================

print("=" * 70)
print("üéØ REDESIGNED FPL ANALYSIS SYSTEM (ELO + Expected Points)")
print("=" * 70)

# Step 1: Calculate ELO ratings
print("\n1Ô∏è‚É£ Calculating ELO-based team ratings...")
elo_ratings = calculate_team_elo_ratings(df, k_factor=32, initial_rating=1500)

print(f"‚úÖ Ratings calculated for {len(elo_ratings)} teams\n")
print("üèÜ Top 10 Teams (Overall ELO):")
print(elo_ratings[['team', 'overall_rank', 'overall_elo', 'attack_elo', 'defense_elo', 'games']].head(10).to_string(index=False))

# Step 2: Calculate fixture difficulty
print("\n\n2Ô∏è‚É£ Calculating fixture difficulty (expected points)...")
fixtures_template = pd.read_csv('fixture_template.csv')
elo_fixtures = calculate_expected_points_fixtures(elo_ratings, fixtures_template, home_advantage=0.15)

print(f"‚úÖ Analyzed {len(elo_fixtures)} fixtures")
print("\nSample Fixtures (GW 1):")
sample = elo_fixtures[elo_fixtures['gameweek'] == 1].head(5)
print(sample[['gameweek', 'home_team', 'away_team', 'home_attacker_xpts', 'away_attacker_xpts', 'home_attack_fdr', 'away_attack_fdr']].to_string(index=False))

# Step 3: Calculate fixture swings
print("\n\n3Ô∏è‚É£ Calculating fixture swings...")
elo_swings = []
for team in elo_ratings['team'].unique():
    swing = calculate_fixture_swing(elo_fixtures, team, near_term_gw=3, medium_term_gw=6)
    elo_swings.append(swing)

elo_swings_df = pd.DataFrame(elo_swings)

print("‚úÖ Fixture swings calculated\n")
print("üìà Teams with IMPROVING Fixtures (Attackers):")
improving = elo_swings_df.nlargest(5, 'attacker_fixture_swing')
print(improving[['team', 'near_term_attacker_xpts', 'medium_term_attacker_xpts', 'attacker_fixture_swing']].to_string(index=False))

print("\n\nüìâ Teams with WORSENING Fixtures (Attackers):")
worsening = elo_swings_df.nsmallest(5, 'attacker_fixture_swing')
print(worsening[['team', 'near_term_attacker_xpts', 'medium_term_attacker_xpts', 'attacker_fixture_swing']].to_string(index=False))

# Step 4: Export to JSON
print("\n\n4Ô∏è‚É£ Exporting redesigned data...")

# Create output directory
import os
os.makedirs('backend/data/redesigned_analysis', exist_ok=True)

# Export ELO ratings
elo_ratings.to_json('backend/data/redesigned_analysis/elo_team_ratings.json', orient='records', indent=2)
print("‚úÖ Exported: elo_team_ratings.json")

# Export fixtures
elo_fixtures.to_json('backend/data/redesigned_analysis/elo_fixtures.json', orient='records', indent=2)
print("‚úÖ Exported: elo_fixtures.json")

# Export swings
elo_swings_df.to_json('backend/data/redesigned_analysis/elo_fixture_swings.json', orient='records', indent=2)
print("‚úÖ Exported: elo_fixture_swings.json")

print("\n" + "=" * 70)
print("‚úÖ REDESIGNED ANALYSIS COMPLETE!")
print("=" * 70)
print("\nüìä Compare with your current system to see which is more accurate!")
print("   Old system: backend/data/fixture_analysis/")
print("   New system: backend/data/redesigned_analysis/")
print("\nüí° Key advantages of new system:")
print("   ‚Ä¢ Simpler (ELO is well-understood)")
print("   ‚Ä¢ Self-correcting (recent results automatically weighted)")
print("   ‚Ä¢ FPL-focused (expected points, not abstract ratings)")
print("   ‚Ä¢ Testable (compare predictions vs actual FPL scores)")

üéØ REDESIGNED FPL ANALYSIS SYSTEM (ELO + Expected Points)

1Ô∏è‚É£ Calculating ELO-based team ratings...
‚úÖ Ratings calculated for 20 teams

üèÜ Top 10 Teams (Overall ELO):
          team  overall_rank  overall_elo  attack_elo  defense_elo  games
       Arsenal             1  1394.417439 1303.083728  1531.418006   19.0
      Man City             2  1387.445450 1305.636926  1510.158235   19.0
       Chelsea             3  1375.208681 1317.356645  1461.986736   19.0
    Sunderland             4  1372.216295 1294.181934  1489.267835   19.0
Crystal Palace             5  1369.138763 1305.653974  1464.365947   19.0
       Everton             6  1367.529091 1284.792217  1491.634402   19.0
      Brighton             7  1366.739687 1317.366756  1440.799084   19.0
         Spurs             8  1363.290833 1303.876984  1452.411607   19.0
   Aston Villa             9  1358.932191 1294.546602  1455.510574   19.0
        Fulham            10  1351.905562 1288.165807  1447.515195   19.0


2Ô∏è‚É£