# FPL Data API - Export Functions

Functions to export processed FPL data as JSON for frontend consumption

In [3]:
import json
from flask import Flask, jsonify, request
from flask_cors import CORS
import pandas as pd
import numpy as np

# Initialize Flask app
app = Flask(__name__)
CORS(app)  # Enable CORS for frontend requests

<flask_cors.extension.CORS at 0x28bb715a7b0>

In [10]:
# Data Export Functions - Updated with correct column names
def export_team_rankings():
    """Export team rankings data for frontend"""
    try:
        # Ensure we have the team_stats data
        if 'team_stats' not in globals():
            print("Error: team_stats not found. Please run the team analysis cells first.")
            return {}
        
        teams_data = []
        
        # Add ranking based on total points (descending order)
        team_stats_ranked = team_stats.copy()
        team_stats_ranked['overall_rank'] = team_stats_ranked['total_points_sum'].rank(method='dense', ascending=False)
        team_stats_ranked['attack_rank'] = team_stats_ranked['G_sum'].rank(method='dense', ascending=False)
        team_stats_ranked['defense_rank'] = team_stats_ranked['total_points_sum'].rank(method='dense', ascending=False)  # Simplified
        
        for team_name, row in team_stats_ranked.iterrows():
            team_data = {
                "name": team_name,
                "code": team_name[:3].upper(),  # First 3 letters as code
                "attackRank": int(row['attack_rank']),
                "defenseRank": int(row['defense_rank']),
                "goalsPerGame": float(row['G_sum']) / max(row['total_points_count'], 1) * 20,  # Approximate per game
                "xGPerGame": 2.0,  # Placeholder - would need xG data
                "cleanSheetPct": 40,  # Placeholder - would need CS data
                "goalsConceded": 1.2,  # Placeholder
                "attackScore": float(row['G_sum']) * 10,  # Scaled score
                "defenseScore": float(row['total_points_sum']) / 10,  # Scaled score
                "overallRank": int(row['overall_rank']),
                "totalPoints": int(row['total_points_sum']),
                "avgCost": float(row['now_cost_mean']),
                "avgOwnership": float(row['selected_by_percent_mean'])
            }
            teams_data.append(team_data)
        
        # Sort by total points descending
        teams_data.sort(key=lambda x: x['totalPoints'], reverse=True)
        
        return {"teams": teams_data}
    except Exception as e:
        print(f"Error exporting team rankings: {e}")
        return {"teams": []}

def export_attacking_picks():
    """Export attacking picks data by team strength"""
    try:
        if 'df' not in globals():
            print("Error: Main DataFrame not found. Please load the data first.")
            return {}
        
        # Get top attacking teams by goals scored
        top_attacking_teams = team_stats.nlargest(5, 'G_sum')
        
        attacking_data = []
        for team_name, team_row in top_attacking_teams.iterrows():
            # Get players from this team
            team_players = df[df['team_name'] == team_name].copy()
            
            # Focus on attacking players (MID, FWD)
            attacking_players = team_players[team_players['position_name'].isin(['Midfielder', 'Forward'])]
            
            # Get top 3 players by total points
            if len(attacking_players) > 0:
                top_players = attacking_players.nlargest(3, 'total_points')
                
                players_data = []
                for _, player in top_players.iterrows():
                    player_data = {
                        "name": player['web_name'],
                        "position": "MID" if player['position_name'] == 'Midfielder' else "FWD",
                        "price": float(player['now_cost']) / 10,  # Convert to millions
                        "goals_pg": float(player.get('G', 0)),  # Goals per appearance
                        "assists_pg": float(player.get('A', 0)),  # Assists per appearance
                        "points_pg": float(player.get('total_points', 0)) / max(player.get('gameweek', 1), 1),
                        "ownership": float(player.get('selected_by_percent', 0)),
                        "form": float(player.get('total_points', 0)) / 10,  # Simplified form
                        "total_points": int(player.get('total_points', 0))
                    }
                    players_data.append(player_data)
                
                team_data = {
                    "team": team_name,
                    "teamCode": team_name[:3].upper(),
                    "attackRank": len(attacking_data) + 1,  # Sequential rank
                    "attackStrength": float(team_row['G_sum']),
                    "difficulty": "easy" if team_row['G_sum'] > 10 else "moderate" if team_row['G_sum'] > 8 else "hard",
                    "players": players_data[:3]  # Top 3 players
                }
                attacking_data.append(team_data)
        
        return {"attackingPicks": attacking_data}
    except Exception as e:
        print(f"Error exporting attacking picks: {e}")
        return {"attackingPicks": []}

def export_defensive_picks():
    """Export defensive picks data by team strength"""
    try:
        if 'df' not in globals():
            print("Error: Main DataFrame not found. Please load the data first.")
            return {}
        
        # Get top defensive teams by total points (proxy for defensive strength)
        top_defensive_teams = team_stats.nlargest(4, 'total_points_sum')
        
        defensive_data = []
        for team_name, team_row in top_defensive_teams.iterrows():
            # Get players from this team
            team_players = df[df['team_name'] == team_name].copy()
            
            # Focus on defensive players (GK, DEF)
            defensive_players = team_players[team_players['position_name'].isin(['Goalkeeper', 'Defender'])]
            
            # Get top 3 players by total points
            if len(defensive_players) > 0:
                top_players = defensive_players.nlargest(3, 'total_points')
                
                players_data = []
                for _, player in top_players.iterrows():
                    # Calculate clean sheet rate (approximate using CS column if available)
                    cs_rate = float(player.get('CS', 0.3))  # Default if not available
                    
                    player_data = {
                        "name": player['web_name'],
                        "position": "GK" if player['position_name'] == 'Goalkeeper' else "DEF",
                        "price": float(player['now_cost']) / 10,
                        "cs_rate": cs_rate / 10 if cs_rate > 1 else cs_rate,  # Normalize
                        "points_pg": float(player.get('total_points', 0)) / max(player.get('gameweek', 1), 1),
                        "ownership": float(player.get('selected_by_percent', 0)),
                        "clean_sheets": int(player.get('CS', 0)),
                        "total_points": int(player.get('total_points', 0))
                    }
                    players_data.append(player_data)
                
                team_data = {
                    "team": team_name,
                    "teamCode": team_name[:3].upper(),
                    "defenseRank": len(defensive_data) + 1,  # Sequential rank
                    "defenseStrength": float(team_row['total_points_sum']) / 100,  # Scaled
                    "difficulty": "easy" if team_row['total_points_sum'] > 300 else "moderate" if team_row['total_points_sum'] > 250 else "hard",
                    "players": players_data[:3]  # Top 3 players
                }
                defensive_data.append(team_data)
        
        return {"defensivePicks": defensive_data}
    except Exception as e:
        print(f"Error exporting defensive picks: {e}")
        return {"defensivePicks": []}

def export_top_performers():
    """Export top performers data"""
    try:
        if 'df' not in globals():
            return {"topPerformers": []}
        
        # Get top performers by different metrics
        top_scorers = df.nlargest(10, 'G')
        top_assisters = df.nlargest(10, 'A') 
        top_points = df.nlargest(10, 'total_points')
        
        performers_data = {
            "topScorers": [{
                "name": row['web_name'],
                "team": row['team_name'],
                "position": "MID" if row['position_name'] == 'Midfielder' else 
                          "FWD" if row['position_name'] == 'Forward' else
                          "DEF" if row['position_name'] == 'Defender' else "GK",
                "goals": int(row['G']),
                "price": float(row['now_cost']) / 10,
                "ownership": float(row['selected_by_percent'])
            } for _, row in top_scorers.iterrows()],
            
            "topAssisters": [{
                "name": row['web_name'],
                "team": row['team_name'],
                "position": "MID" if row['position_name'] == 'Midfielder' else 
                          "FWD" if row['position_name'] == 'Forward' else
                          "DEF" if row['position_name'] == 'Defender' else "GK",
                "assists": int(row['A']),
                "price": float(row['now_cost']) / 10,
                "ownership": float(row['selected_by_percent'])
            } for _, row in top_assisters.iterrows()],
            
            "topPoints": [{
                "name": row['web_name'],
                "team": row['team_name'],
                "position": "MID" if row['position_name'] == 'Midfielder' else 
                          "FWD" if row['position_name'] == 'Forward' else
                          "DEF" if row['position_name'] == 'Defender' else "GK",
                "points": int(row['total_points']),
                "price": float(row['now_cost']) / 10,
                "ownership": float(row['selected_by_percent']),
                "form": float(row['total_points']) / 10  # Simplified form calculation
            } for _, row in top_points.iterrows()]
        }
        
        return performers_data
    except Exception as e:
        print(f"Error exporting top performers: {e}")
        return {"topPerformers": []}

In [5]:
# Flask API Routes
@app.route('/api/team-rankings', methods=['GET'])
def get_team_rankings():
    """API endpoint for team rankings"""
    data = export_team_rankings()
    return jsonify(data)

@app.route('/api/attacking-picks', methods=['GET'])
def get_attacking_picks():
    """API endpoint for attacking picks"""
    data = export_attacking_picks()
    return jsonify(data)

@app.route('/api/defensive-picks', methods=['GET'])
def get_defensive_picks():
    """API endpoint for defensive picks"""
    data = export_defensive_picks()
    return jsonify(data)

@app.route('/api/top-performers', methods=['GET'])
def get_top_performers():
    """API endpoint for top performers"""
    data = export_top_performers()
    return jsonify(data)

@app.route('/api/quick-picks', methods=['GET'])
def get_quick_picks():
    """API endpoint for quick picks (combines attacking and defensive)"""
    attacking = export_attacking_picks()
    defensive = export_defensive_picks()
    
    return jsonify({
        "attacking": attacking.get("attackingPicks", []),
        "defensive": defensive.get("defensivePicks", [])
    })

@app.route('/api/health', methods=['GET'])
def health_check():
    """Health check endpoint"""
    return jsonify({"status": "healthy", "message": "FPL API is running"})

# Function to start the Flask server
def start_api_server(port=5000, debug=True):
    """Start the Flask API server"""
    print(f"Starting FPL API server on port {port}...")
    print("Available endpoints:")
    print(f"  - http://localhost:{port}/api/health")
    print(f"  - http://localhost:{port}/api/team-rankings")
    print(f"  - http://localhost:{port}/api/attacking-picks")
    print(f"  - http://localhost:{port}/api/defensive-picks")
    print(f"  - http://localhost:{port}/api/top-performers")
    print(f"  - http://localhost:{port}/api/quick-picks")
    
    app.run(host='0.0.0.0', port=port, debug=debug)

In [11]:
# Test the export functions
print("Testing data export functions...")

# Test team rankings export
print("\n=== Team Rankings Export ===")
team_data = export_team_rankings()
print(f"Exported {len(team_data.get('teams', []))} teams")
if team_data.get('teams'):
    print("Sample team:", team_data['teams'][0])

# Test attacking picks export  
print("\n=== Attacking Picks Export ===")
attacking_data = export_attacking_picks()
print(f"Exported {len(attacking_data.get('attackingPicks', []))} attacking teams")
if attacking_data.get('attackingPicks'):
    print("Sample attacking team:", attacking_data['attackingPicks'][0])

# Test defensive picks export
print("\n=== Defensive Picks Export ===")
defensive_data = export_defensive_picks()
print(f"Exported {len(defensive_data.get('defensivePicks', []))} defensive teams")
if defensive_data.get('defensivePicks'):
    print("Sample defensive team:", defensive_data['defensivePicks'][0])

Testing data export functions...

=== Team Rankings Export ===
Exported 20 teams
Sample team: {'name': 'Arsenal', 'code': 'ARS', 'attackRank': 1, 'defenseRank': 1, 'goalsPerGame': np.float64(1.153846153846154), 'xGPerGame': 2.0, 'cleanSheetPct': 40, 'goalsConceded': 1.2, 'attackScore': 120.0, 'defenseScore': 35.5, 'overallRank': 1, 'totalPoints': 355, 'avgCost': 5.72, 'avgOwnership': 4.52}

=== Attacking Picks Export ===
Exported 5 attacking teams
Sample attacking team: {'team': 'Arsenal', 'teamCode': 'ARS', 'attackRank': 1, 'attackStrength': 12.0, 'difficulty': 'easy', 'players': [{'name': 'Zubimendi', 'position': 'MID', 'price': 0.55, 'goals_pg': 2.0, 'assists_pg': 0.0, 'points_pg': 4.0, 'ownership': 4.3, 'form': 1.6, 'total_points': 16}, {'name': 'Gyökeres', 'position': 'FWD', 'price': 0.9, 'goals_pg': 2.0, 'assists_pg': 0.0, 'points_pg': 6.0, 'ownership': 25.9, 'form': 1.2, 'total_points': 12}, {'name': 'Rice', 'position': 'MID', 'price': 0.65, 'goals_pg': 0.0, 'assists_pg': 1.0, '

In [8]:
# Debug: Check available columns
print("=== DEBUGGING DATA STRUCTURE ===")
print("\nDataFrame columns:")
print(df.columns.tolist())

print("\nteam_stats columns:")
if 'team_stats' in globals():
    print(team_stats.columns.tolist())
    print("\nteam_stats head:")
    print(team_stats.head())
else:
    print("team_stats not found")

print("\nUnique teams in df:")
print(df['team_name'].unique()[:10])  # First 10 teams

print("\nSample player data:")
print(df[['web_name', 'team_name', 'position_name', 'now_cost', 'total_points', 'selected_by_percent']].head())

=== DEBUGGING DATA STRUCTURE ===

DataFrame columns:
['id', 'element_type', 'web_name', 'team_name', 'opponent_team_name', 'was_home', 'now_cost', 'selected_by_percent', 'gameweek', 'minutes', 'shots', 'SoT', 'SiB', 'xG', 'npxG', 'G', 'npG', 'key_passes', 'xA', 'A', 'xGC', 'GC', 'xCS', 'CS', 'clearances_blocks_interceptions', 'recoveries', 'tackles', 'defensive_contribution', 'xGI', 'npxGI', 'xP', 'total_points', 'PvsxP', 'touches', 'penalty_area_touches', 'carries_final_third', 'carries_penalty_area', 'position_name', 'points_per_million']

team_stats columns:
['total_points_count', 'total_points_sum', 'total_points_mean', 'now_cost_mean', 'selected_by_percent_mean', 'G_sum', 'A_sum', 'minutes_sum']

team_stats head:
             total_points_count  total_points_sum  total_points_mean  \
team_name                                                              
Arsenal                     208               355               1.71   
Spurs                       222               333       

# 📊 Fantasy Premier League (FPL) - Complete Data Analysis & Strategy Tools

## 🎯 **Overview**
This notebook provides comprehensive analysis tools for Fantasy Premier League decision-making, including:
- **Data Exploration & Cleaning** - Understanding the dataset structure
- **Season Performance Analysis** - Player and team cumulative statistics  
- **Strategic Analysis Tools** - Fixture difficulty, player rankings, team strength
- **Actionable FPL Insights** - Real-world applications for transfers and team selection

## 📋 **Table of Contents**
1. [**Data Loading & Overview**](#data-loading--overview)
2. [**Data Cleaning & Processing**](#data-cleaning--processing)  
3. [**Exploratory Data Analysis**](#exploratory-data-analysis)
4. [**Season Statistics Calculation**](#season-statistics-calculation)
5. [**Player Performance Analysis**](#player-performance-analysis)
6. [**Strategic Analysis Tools**](#strategic-analysis-tools)
7. [**Fixture Analysis System**](#fixture-analysis-system)
8. [**Quick Reference & Usage Guide**](#quick-reference--usage-guide)

---

In [1]:
import pandas as pd 
df = pd.read_csv('fpl-data-stats.csv')
df.describe()

Unnamed: 0,id,element_type,now_cost,selected_by_percent,gameweek,minutes,shots,SoT,SiB,xG,...,defensive_contribution,xGI,npxGI,xP,total_points,PvsxP,touches,penalty_area_touches,carries_final_third,carries_penalty_area
count,4330.0,4330.0,4330.0,4330.0,4330.0,4330.0,4330.0,4330.0,4330.0,4330.0,...,4330.0,4330.0,4330.0,4330.0,4330.0,4330.0,1825.0,1825.0,4330.0,4330.0
mean,361.621709,2.545035,5.001409,2.063811,3.545727,27.252425,0.322864,0.102079,0.21963,0.034457,...,2.07067,0.059238,0.056328,1.243409,1.254965,0.011557,37.956164,1.492603,0.3097,0.124249
std,208.84697,0.834645,1.098394,6.042137,1.703166,37.832377,0.809484,0.368825,0.638731,0.127378,...,3.619726,0.171224,0.161028,2.042369,2.412934,1.419915,24.588661,1.929138,0.823496,0.529812
min,1.0,1.0,3.9,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,-2.0,-3.0,-11.4,0.0,0.0,0.0,0.0
25%,181.0,2.0,4.4,0.1,2.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,19.0,0.0,0.0,0.0
50%,361.0,3.0,4.8,0.2,4.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,36.0,1.0,0.0,0.0
75%,542.0,3.0,5.4,1.0,5.0,70.0,0.0,0.0,0.0,0.0,...,3.0,0.0,0.0,2.094,1.0,0.0,54.0,2.0,0.0,0.0
max,742.0,4.0,14.5,65.9,6.0,90.0,7.0,4.0,7.0,2.0,...,23.0,2.0,2.0,13.0,24.0,12.826,129.0,18.0,8.0,11.0


# 1️⃣ Data Loading & Overview {#data-loading--overview}

## 📂 Import Data and Initial Exploration
This section loads the FPL dataset and provides basic information about its structure.

In [94]:
# Dataset Overview and Structure
print("=== DATASET OVERVIEW ===")
print(f"Dataset Shape: {df.shape}")
print(f"Total Records: {df.shape[0]:,}")
print(f"Total Features: {df.shape[1]}")
print("\n=== COLUMN NAMES ===")
print(df.columns.tolist())

print("\n=== DATA TYPES ===")
print(df.dtypes)

print("\n=== BASIC INFO ===")
df.info()

=== DATASET OVERVIEW ===
Dataset Shape: (4330, 37)
Total Records: 4,330
Total Features: 37

=== COLUMN NAMES ===
['id', 'element_type', 'web_name', 'team_name', 'opponent_team_name', 'was_home', 'now_cost', 'selected_by_percent', 'gameweek', 'minutes', 'shots', 'SoT', 'SiB', 'xG', 'npxG', 'G', 'npG', 'key_passes', 'xA', 'A', 'xGC', 'GC', 'xCS', 'CS', 'clearances_blocks_interceptions', 'recoveries', 'tackles', 'defensive_contribution', 'xGI', 'npxGI', 'xP', 'total_points', 'PvsxP', 'touches', 'penalty_area_touches', 'carries_final_third', 'carries_penalty_area']

=== DATA TYPES ===
id                                   int64
element_type                         int64
web_name                            object
team_name                           object
opponent_team_name                  object
was_home                              bool
now_cost                           float64
selected_by_percent                float64
gameweek                             int64
minutes                  

In [95]:
# Missing Values Analysis
print("=== MISSING VALUES ANALYSIS ===")
missing_values = df.isnull().sum()
missing_percentage = (missing_values / len(df)) * 100

missing_df = pd.DataFrame({
    'Column': missing_values.index,
    'Missing Count': missing_values.values,
    'Missing Percentage': missing_percentage.values
}).sort_values('Missing Count', ascending=False)

# Display only columns with missing values
if missing_df['Missing Count'].sum() > 0:
    print(missing_df[missing_df['Missing Count'] > 0])
else:
    print("No missing values found in the dataset!")

print(f"\nTotal missing values in dataset: {missing_values.sum():,}")
print(f"Percentage of complete records: {((len(df) - missing_values.sum()) / len(df)) * 100:.2f}%")

df = df.drop(columns=['penalty_area_touches', 'touches'])

=== MISSING VALUES ANALYSIS ===
                  Column  Missing Count  Missing Percentage
34  penalty_area_touches           2505           57.852194
33               touches           2505           57.852194

Total missing values in dataset: 5,010
Percentage of complete records: -15.70%


# 2️⃣ Data Cleaning & Processing {#data-cleaning--processing}

## 🧹 Data Quality Assessment and Cleaning
Analyzing missing values, data types, and performing necessary data cleaning operations.

In [96]:
# Separate Numerical and Categorical Variables
import numpy as np

# Identify numerical and categorical columns
numerical_cols = df.select_dtypes(include=[np.number]).columns.tolist()
categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()

print("=== VARIABLE TYPES ===")
print(f"Numerical variables ({len(numerical_cols)}): {numerical_cols}")
print(f"\nCategorical variables ({len(categorical_cols)}): {categorical_cols}")

# For categorical variables, show unique values
print("\n=== CATEGORICAL VARIABLES ANALYSIS ===")
for col in categorical_cols[:10]:  # Show first 10 categorical columns
    unique_count = df[col].nunique()
    print(f"\n{col}:")
    print(f"  - Unique values: {unique_count}")
    if unique_count <= 20:  # Show values if not too many
        print(f"  - Values: {sorted(df[col].unique())}")
    else:
        print(f"  - Top 10 values: {df[col].value_counts().head(10).index.tolist()}")

=== VARIABLE TYPES ===
Numerical variables (31): ['id', 'element_type', 'now_cost', 'selected_by_percent', 'gameweek', 'minutes', 'shots', 'SoT', 'SiB', 'xG', 'npxG', 'G', 'npG', 'key_passes', 'xA', 'A', 'xGC', 'GC', 'xCS', 'CS', 'clearances_blocks_interceptions', 'recoveries', 'tackles', 'defensive_contribution', 'xGI', 'npxGI', 'xP', 'total_points', 'PvsxP', 'carries_final_third', 'carries_penalty_area']

Categorical variables (3): ['web_name', 'team_name', 'opponent_team_name']

=== CATEGORICAL VARIABLES ANALYSIS ===

web_name:
  - Unique values: 721
  - Top 10 values: ['Patterson', 'Henderson', 'White', 'Wilson', 'Barnes', 'King', 'James', 'Gomez', 'Neto', "O'Brien"]

team_name:
  - Unique values: 20
  - Values: ['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford', 'Brighton', 'Burnley', 'Chelsea', 'Crystal Palace', 'Everton', 'Fulham', 'Leeds', 'Liverpool', 'Man City', 'Man Utd', 'Newcastle', "Nott'm Forest", 'Spurs', 'Sunderland', 'West Ham', 'Wolves']

opponent_team_name:
  - U

In [97]:
# Filter useful numerical variables for FPL analysis
print("=== FILTERING USEFUL NUMERICAL VARIABLES ===")

# Define categories of useful variables
core_performance = ['total_points', 'minutes', 'now_cost', 'selected_by_percent']
attacking_metrics = ['G', 'A', 'xG', 'xA', 'shots', 'SoT', 'key_passes']
expected_metrics = ['xG', 'xA', 'xGI', 'npxG', 'npxGI', 'xP']
defensive_metrics = ['CS', 'xCS', 'GC', 'xGC', 'tackles', 'recoveries', 
                    'clearances_blocks_interceptions', 'defensive_contribution']
advanced_metrics = ['PvsxP', 'carries_final_third', 'carries_penalty_area']

# Combine into useful variables list
useful_numerical_vars = list(set(core_performance + attacking_metrics + 
                                expected_metrics + defensive_metrics + advanced_metrics))

# Filter only variables that exist in the dataset
useful_vars_available = [var for var in useful_numerical_vars if var in numerical_cols]

print(f"Original numerical variables: {len(numerical_cols)}")
print(f"Useful numerical variables: {len(useful_vars_available)}")
print(f"Variables removed: {len(numerical_cols) - len(useful_vars_available)}")

print(f"\n=== USEFUL VARIABLES BY CATEGORY ===")
print(f"Core Performance: {[v for v in core_performance if v in useful_vars_available]}")
print(f"Attacking Metrics: {[v for v in attacking_metrics if v in useful_vars_available]}")
print(f"Expected Stats: {[v for v in expected_metrics if v in useful_vars_available]}")
print(f"Defensive Metrics: {[v for v in defensive_metrics if v in useful_vars_available]}")
print(f"Advanced Metrics: {[v for v in advanced_metrics if v in useful_vars_available]}")

# Variables to exclude (less useful for FPL analysis)
excluded_vars = [var for var in numerical_cols if var not in useful_vars_available]
print(f"\n=== EXCLUDED VARIABLES ===")
print(f"Less useful for FPL: {excluded_vars}")

# Create filtered dataset with useful variables only
useful_numerical_df = df[useful_vars_available].copy()
print(f"\n=== FILTERED DATASET INFO ===")
print(f"Shape: {useful_numerical_df.shape}")
print(f"Useful numerical variables: {useful_vars_available}")

=== FILTERING USEFUL NUMERICAL VARIABLES ===
Original numerical variables: 31
Useful numerical variables: 26
Variables removed: 5

=== USEFUL VARIABLES BY CATEGORY ===
Core Performance: ['total_points', 'minutes', 'now_cost', 'selected_by_percent']
Attacking Metrics: ['G', 'A', 'xG', 'xA', 'shots', 'SoT', 'key_passes']
Expected Stats: ['xG', 'xA', 'xGI', 'npxG', 'npxGI', 'xP']
Defensive Metrics: ['CS', 'xCS', 'GC', 'xGC', 'tackles', 'recoveries', 'clearances_blocks_interceptions', 'defensive_contribution']
Advanced Metrics: ['PvsxP', 'carries_final_third', 'carries_penalty_area']

=== EXCLUDED VARIABLES ===
Less useful for FPL: ['id', 'element_type', 'gameweek', 'SiB', 'npG']

=== FILTERED DATASET INFO ===
Shape: (4330, 26)
Useful numerical variables: ['PvsxP', 'now_cost', 'carries_final_third', 'clearances_blocks_interceptions', 'minutes', 'GC', 'key_passes', 'xCS', 'xGI', 'SoT', 'npxG', 'total_points', 'selected_by_percent', 'xP', 'xGC', 'A', 'xG', 'G', 'recoveries', 'CS', 'defensive

In [98]:
# Display the first 20 rows of the dataset
print("=== TOP 20 ROWS OF DATASET ===")
print(df.head(20))



=== TOP 20 ROWS OF DATASET ===
    id  element_type      web_name team_name opponent_team_name  was_home  \
0    1             1          Raya   Arsenal            Man Utd     False   
1    2             1  Arrizabalaga   Arsenal            Man Utd     False   
2    3             1          Hein   Arsenal            Man Utd     False   
3    4             1       Setford   Arsenal            Man Utd     False   
4    5             2       Gabriel   Arsenal            Man Utd     False   
5    6             2        Saliba   Arsenal            Man Utd     False   
6    7             2     Calafiori   Arsenal            Man Utd     False   
7    8             2      J.Timber   Arsenal            Man Utd     False   
8    9             2        Kiwior   Arsenal            Man Utd     False   
9   10             2  Lewis-Skelly   Arsenal            Man Utd     False   
10  11             2         White   Arsenal            Man Utd     False   
11  12             2     Zinchenko   Arsenal 

In [99]:
# Outlier Detection and Analysis
print("=== OUTLIER DETECTION ===")

def detect_outliers_iqr(df, column):
    """Detect outliers using IQR method"""
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    
    outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)]
    return outliers, lower_bound, upper_bound

# Analyze outliers for key metrics
key_metrics = ['total_points', 'now_cost', 'selected_by_percent', 'minutes']

for metric in key_metrics:
    if metric in df.columns and df[metric].notna().sum() > 0:
        outliers, lower, upper = detect_outliers_iqr(df, metric)
        print(f"\n{metric.upper()}:")
        print(f"  Normal range: {lower:.2f} to {upper:.2f}")
        print(f"  Number of outliers: {len(outliers)}")
        print(f"  Percentage of outliers: {(len(outliers) / len(df)) * 100:.2f}%")
        
        if len(outliers) > 0 and len(outliers) <= 10:
            print("  Top outliers:")
            top_outliers = outliers.nlargest(10, metric)[['web_name', 'team_name', metric]]
            for _, player in top_outliers.iterrows():
                print(f"    {player['web_name']} ({player['team_name']}): {player[metric]}")

# Performance vs Expected Analysis
print("\n\n=== PERFORMANCE vs EXPECTED ANALYSIS ===")

# Players overperforming xG
if 'xG' in df.columns and 'G' in df.columns:
    df['goal_overperformance'] = df['G'] - df['xG']
    top_goal_overperformers = df[df['goal_overperformance'] > 0].nlargest(10, 'goal_overperformance')
    print("\nTop Goal Overperformers:")
    for _, player in top_goal_overperformers.iterrows():
        print(f"  {player['web_name']} ({player['team_name']}): {player['G']:.1f} goals vs {player['xG']:.2f} xG (+{player['goal_overperformance']:.2f})")

# Players overperforming xA
if 'xA' in df.columns and 'A' in df.columns:
    df['assist_overperformance'] = df['A'] - df['xA']
    top_assist_overperformers = df[df['assist_overperformance'] > 0].nlargest(10, 'assist_overperformance')
    print("\nTop Assist Overperformers:")
    for _, player in top_assist_overperformers.iterrows():
        print(f"  {player['web_name']} ({player['team_name']}): {player['A']:.1f} assists vs {player['xA']:.2f} xA (+{player['assist_overperformance']:.2f})")

=== OUTLIER DETECTION ===

TOTAL_POINTS:
  Normal range: -1.50 to 2.50
  Number of outliers: 683
  Percentage of outliers: 15.77%

NOW_COST:
  Normal range: 2.90 to 6.90
  Number of outliers: 228
  Percentage of outliers: 5.27%

SELECTED_BY_PERCENT:
  Normal range: -1.25 to 2.35
  Number of outliers: 699
  Percentage of outliers: 16.14%

MINUTES:
  Normal range: -105.00 to 175.00
  Number of outliers: 0
  Percentage of outliers: 0.00%


=== PERFORMANCE vs EXPECTED ANALYSIS ===

Top Goal Overperformers:
  Zubimendi (Arsenal): 2.0 goals vs 0.20 xG (+1.80)
  Thiago (Brentford): 2.0 goals vs 0.60 xG (+1.40)
  Richarlison (Spurs): 2.0 goals vs 0.70 xG (+1.30)
  J.Timber (Arsenal): 2.0 goals vs 0.70 xG (+1.30)
  Welbeck (Brighton): 2.0 goals vs 0.80 xG (+1.20)
  Semenyo (Bournemouth): 2.0 goals vs 0.90 xG (+1.10)
  Wood (Nott'm Forest): 2.0 goals vs 1.00 xG (+1.00)
  Isidor (Sunderland): 1.0 goals vs 0.00 xG (+1.00)
  Garner (Everton): 1.0 goals vs 0.00 xG (+1.00)
  Gravenberch (Liverpool): 

# 3️⃣ Exploratory Data Analysis {#exploratory-data-analysis}

## 🔍 Deep Dive into Data Patterns
Exploring data distributions, outliers, and relationships between variables.

In [2]:
# Positional and Team Analysis
print("=== POSITIONAL ANALYSIS ===")

# Position mapping
position_map = {1: 'Goalkeeper', 2: 'Defender', 3: 'Midfielder', 4: 'Forward'}
df['position_name'] = df['element_type'].map(position_map)

# Analysis by position
position_stats = df.groupby('position_name').agg({
    'total_points': ['count', 'mean', 'median', 'max'],
    'now_cost': ['mean', 'median'],
    'minutes': ['mean'],
    'selected_by_percent': ['mean'],
    'G': ['mean'],
    'A': ['mean']
}).round(2)

print("Position Statistics:")
print(position_stats)

print("\n=== TEAM ANALYSIS ===")

# Team performance analysis
team_stats = df.groupby('team_name').agg({
    'total_points': ['count', 'sum', 'mean'],
    'now_cost': ['mean'],
    'selected_by_percent': ['mean'],
    'G': ['sum'],
    'A': ['sum'],
    'minutes': ['sum']
}).round(2)

team_stats.columns = ['_'.join(col) for col in team_stats.columns]
team_stats = team_stats.sort_values('total_points_sum', ascending=False)

print("\nTop 10 Teams by Total Points:")
print(team_stats.head(10)[['total_points_sum', 'total_points_mean', 'now_cost_mean']])

print("\n=== VALUE ANALYSIS BY POSITION ===")
# Calculate points per million by position
df['points_per_million'] = df['total_points'] / df['now_cost']

value_by_position = df[df['total_points'] > 0].groupby('position_name')['points_per_million'].agg([
    'count', 'mean', 'median', 'max'
]).round(2)

print(value_by_position)

=== POSITIONAL ANALYSIS ===
Position Statistics:
              total_points                  now_cost        minutes  \
                     count  mean median max     mean median    mean   
position_name                                                         
Defender              1432  1.36    0.0  24     4.49    4.4   31.17   
Forward                468  1.30    0.0  16     5.80    5.5   22.37   
Goalkeeper             503  0.83    0.0  15     4.32    4.0   21.47   
Midfielder            1927  1.28    0.0  16     5.37    5.0   27.04   

              selected_by_percent     G     A  
                             mean  mean  mean  
position_name                                  
Defender                     2.10  0.01  0.02  
Forward                      3.81  0.11  0.02  
Goalkeeper                   2.33  0.00  0.00  
Midfielder                   1.55  0.04  0.04  

=== TEAM ANALYSIS ===

Top 10 Teams by Total Points:
                total_points_sum  total_points_mean  now_cost_m

# 5️⃣ Player Performance Analysis {#player-performance-analysis}

## 🏆 Feature 1 Season Leaders, Value Picks & Hidden Gems
Analysis of top performers using **cumulative season statistics** (not single gameweek data).

In [101]:
# Calculate cumulative season statistics for each player
print("=== CALCULATING CUMULATIVE SEASON STATISTICS ===")

# Group by player and calculate season totals
season_stats = df.groupby(['web_name', 'team_name', 'element_type', 'now_cost', 'selected_by_percent']).agg({
    'total_points': 'sum',  # Sum of all gameweek points
    'minutes': 'sum',       # Total minutes played
    'G': 'sum',            # Total goals
    'A': 'sum',            # Total assists  
    'xG': 'sum',           # Total expected goals
    'xA': 'sum',           # Total expected assists
    'shots': 'sum',        # Total shots
    'SoT': 'sum',          # Total shots on target
    'key_passes': 'sum',   # Total key passes
    'CS': 'sum',           # Total clean sheets
    'xCS': 'sum',          # Total expected clean sheets
    'GC': 'sum',           # Total goals conceded
    'xGC': 'sum',          # Total expected goals conceded
    'gameweek': ['count', 'max'],  # Games played and latest gameweek
    'SiB': 'sum',          # Total shots in box
    'tackles': 'sum',      # Total tackles
    'recoveries': 'sum'    # Total recoveries
}).round(2)

print("Columns after aggregation:")
print(season_stats.columns.tolist())

# Flatten column names
season_stats.columns = ['_'.join(col) if col[1] else col[0] for col in season_stats.columns]
season_stats = season_stats.rename(columns={
    'gameweek_count': 'games_played',
    'gameweek_max': 'last_gameweek'
})

print("Columns after flattening:")
print(season_stats.columns.tolist())

# Reset index to make it a regular dataframe
season_stats = season_stats.reset_index()

# Add position names
position_map = {1: 'Goalkeeper', 2: 'Defender', 3: 'Midfielder', 4: 'Forward'}
season_stats['position_name'] = season_stats['element_type'].map(position_map)

# Calculate additional metrics using the correct column names
season_stats['points_per_million'] = season_stats['total_points_sum'] / season_stats['now_cost']
season_stats['points_per_game'] = season_stats['total_points_sum'] / season_stats['games_played']
season_stats['minutes_per_game'] = season_stats['minutes_sum'] / season_stats['games_played']
season_stats['goals_per_game'] = season_stats['G_sum'] / season_stats['games_played']
season_stats['assists_per_game'] = season_stats['A_sum'] / season_stats['games_played']

# Rename main columns for clarity
season_stats = season_stats.rename(columns={
    'total_points_sum': 'season_points',
    'minutes_sum': 'season_minutes',
    'G_sum': 'season_goals',
    'A_sum': 'season_assists',
    'xG_sum': 'season_xG',
    'xA_sum': 'season_xA',
    'shots_sum': 'season_shots',
    'SoT_sum': 'season_SoT',
    'key_passes_sum': 'season_key_passes',
    'CS_sum': 'season_CS',
    'xCS_sum': 'season_xCS',
    'GC_sum': 'season_GC',
    'xGC_sum': 'season_xGC',
    'SiB_sum': 'season_SiB',
    'tackles_sum': 'season_tackles',
    'recoveries_sum': 'season_recoveries'
})

# Round all numeric columns
numeric_cols = season_stats.select_dtypes(include=[np.number]).columns
season_stats[numeric_cols] = season_stats[numeric_cols].round(2)

print(f"Created season stats for {len(season_stats)} players")
print(f"Data covers gameweeks 1-{df['gameweek'].max()}")
print("\nSample of season stats:")
print(season_stats[['web_name', 'team_name', 'position_name', 'games_played', 'season_points', 'season_goals', 'season_assists', 'season_minutes']].head().to_string(index=False))

=== CALCULATING CUMULATIVE SEASON STATISTICS ===
Columns after aggregation:
[('total_points', 'sum'), ('minutes', 'sum'), ('G', 'sum'), ('A', 'sum'), ('xG', 'sum'), ('xA', 'sum'), ('shots', 'sum'), ('SoT', 'sum'), ('key_passes', 'sum'), ('CS', 'sum'), ('xCS', 'sum'), ('GC', 'sum'), ('xGC', 'sum'), ('gameweek', 'count'), ('gameweek', 'max'), ('SiB', 'sum'), ('tackles', 'sum'), ('recoveries', 'sum')]
Columns after flattening:
['total_points_sum', 'minutes_sum', 'G_sum', 'A_sum', 'xG_sum', 'xA_sum', 'shots_sum', 'SoT_sum', 'key_passes_sum', 'CS_sum', 'xCS_sum', 'GC_sum', 'xGC_sum', 'games_played', 'last_gameweek', 'SiB_sum', 'tackles_sum', 'recoveries_sum']
Created season stats for 758 players
Data covers gameweeks 1-6

Sample of season stats:
 web_name   team_name position_name  games_played  season_points  season_goals  season_assists  season_minutes
 A.Becker   Liverpool    Goalkeeper             6             20           0.0             0.0             540
 A.García Aston Villa      

In [114]:
# STREAMLINED: Key FPL Insights - Non-Overlapping Essential Data
print("🏆 === FPL KEY INSIGHTS & RECOMMENDATIONS ===")

# 1. TOP POINT SCORERS (Season Total) - Most reliable performers
print("\n⭐ TOP 10 SEASON PERFORMERS")
print("-" * 50)
top_scorers = season_stats.nlargest(10, 'season_points')
for i, (_, player) in enumerate(top_scorers.iterrows(), 1):
    ppg = player['season_points'] / player['games_played'] if player['games_played'] > 0 else 0
    print(f"{i:2d}. {player['web_name']} ({player['position_name']}, {player['team_name']})")
    print(f"    {player['season_points']:.0f} pts ({ppg:.1f} ppg) | £{player['now_cost']}m | {player['selected_by_percent']}% owned")

# 2. BEST VALUE PICKS - Points per million (exclude top performers to avoid overlap)
print(f"\n💰 BEST VALUE PLAYERS (£/Points Efficiency)")
print("-" * 50)
# Exclude players already in top 10 scorers to avoid overlap
top_scorer_names = set(top_scorers['web_name'])
value_candidates = season_stats[
    (season_stats['season_points'] >= 15) & 
    (season_stats['points_per_million'] > 0) &
    (~season_stats['web_name'].isin(top_scorer_names))  # Exclude top scorers
]
value_players = value_candidates.nlargest(8, 'points_per_million')
for i, (_, player) in enumerate(value_players.iterrows(), 1):
    print(f"{i}. {player['web_name']} ({player['position_name']}, {player['team_name']})")
    print(f"   {player['points_per_million']:.2f} pts/£m | {player['season_points']:.0f} pts | £{player['now_cost']}m")

# 3. HIDDEN GEMS - Low ownership with strong underlying stats
print(f"\n💎 HIDDEN GEMS (Low Ownership + Strong Potential)")
print("-" * 50)
hidden_gems = season_stats[
    (season_stats['season_points'] >= 25) & 
    (season_stats['selected_by_percent'] < 5) & 
    (season_stats['selected_by_percent'] > 0) &
    (season_stats['games_played'] >= 3) &
    (~season_stats['web_name'].isin(top_scorer_names))  # Exclude top performers
]

if len(hidden_gems) > 0:
    # Calculate underlying performance score
    hidden_gems = hidden_gems.copy()
    hidden_gems['potential_score'] = (
        hidden_gems['season_xG'] * 0.4 + 
        hidden_gems['season_xA'] * 0.3 + 
        hidden_gems['season_xCS'] * 0.2 + 
        hidden_gems['season_key_passes'] * 0.1
    )
    
    hidden_gems_sorted = hidden_gems.nlargest(6, 'potential_score')
    for i, (_, player) in enumerate(hidden_gems_sorted.iterrows(), 1):
        print(f"{i}. {player['web_name']} ({player['position_name']}, {player['team_name']})")
        print(f"   {player['season_points']:.0f} pts | {player['selected_by_percent']}% owned | £{player['now_cost']}m")
        print(f"   Potential: xG:{player['season_xG']:.1f}, xA:{player['season_xA']:.1f}, xCS:{player['season_xCS']:.1f}")
else:
    print("No hidden gems found with current criteria")

# 4. ATTACKING & DEFENSIVE LEADERS (Complete picture)
print(f"\n⚽ ATTACKING LEADERS")
print("-" * 50)
goal_leaders = season_stats[season_stats['season_goals'] > 0].nlargest(5, 'season_goals')
print("🥅 Top Goal Scorers:")
for i, (_, player) in enumerate(goal_leaders.iterrows(), 1):
    gpg = player['season_goals'] / player['games_played']
    print(f"  {i}. {player['web_name']} ({player['team_name']}): {player['season_goals']:.0f} goals ({gpg:.2f}/game)")

assist_leaders = season_stats[season_stats['season_assists'] > 0].nlargest(5, 'season_assists')
print("\n🎯 Top Assist Providers:")
for i, (_, player) in enumerate(assist_leaders.iterrows(), 1):
    apg = player['season_assists'] / player['games_played']
    print(f"  {i}. {player['web_name']} ({player['team_name']}): {player['season_assists']:.0f} assists ({apg:.2f}/game)")

# 5. COMPREHENSIVE DEFENSIVE LEADERS (Properly calculated)
print(f"\n🛡️ DEFENSIVE LEADERS")
print("-" * 50)

# Filter for actual defensive players (min games played + points to exclude bench warmers)
defensive_candidates = season_stats[
    (season_stats['season_points'] >= 10) &  # Must have at least 10 points (actual playing time)
    (season_stats['games_played'] >= 3) &    # Must have played at least 3 games
    (season_stats['position_name'].isin(['Goalkeeper', 'Defender']))  # Only GK/DEF
].copy()

if len(defensive_candidates) > 0:
    # Calculate comprehensive defensive score incorporating multiple metrics
    defensive_candidates['defensive_score'] = (
        defensive_candidates['season_CS'] * 3.0 +  # Clean sheets (most important)
        defensive_candidates['season_tackles'] * 0.5 +  # Tackles
        defensive_candidates['season_recoveries'] * 0.2 +  # Recoveries
        defensive_candidates['season_xCS'] * 1.5 +  # Expected clean sheets (sustainability)
        (defensive_candidates['season_points'] / defensive_candidates['games_played']) * 1.0  # PPG for overall value
    )
    
    # Top overall defensive performers
    top_defenders = defensive_candidates.nlargest(8, 'defensive_score')
    print("🛡️ Best Defensive Performers (Comprehensive Scoring):")
    for i, (_, player) in enumerate(top_defenders.iterrows(), 1):
        cs_rate = (player['season_CS'] / player['games_played']) * 100 if player['games_played'] > 0 else 0
        ppg = player['season_points'] / player['games_played'] if player['games_played'] > 0 else 0
        print(f"  {i}. {player['web_name']} ({player['team_name']}, {player['position_name']})")
        print(f"     {player['season_points']:.0f} pts ({ppg:.1f} ppg) | {player['season_CS']:.0f} CS ({cs_rate:.1f}%) | {player['season_tackles']:.0f} tackles | £{player['now_cost']}m")
    
    # Best Clean Sheet specialists (with meaningful playing time)
    cs_specialists = defensive_candidates[defensive_candidates['season_CS'] > 0].nlargest(5, 'season_CS')
    print(f"\n🥅 Clean Sheet Specialists:")
    for i, (_, player) in enumerate(cs_specialists.iterrows(), 1):
        cs_rate = (player['season_CS'] / player['games_played']) * 100 if player['games_played'] > 0 else 0
        print(f"  {i}. {player['web_name']} ({player['team_name']}): {player['season_CS']:.0f} CS ({cs_rate:.1f}% rate), {player['season_points']:.0f} pts")
    
    # Top Goalkeepers only (with playing time)
    gk_candidates = defensive_candidates[
        (defensive_candidates['position_name'] == 'Goalkeeper') & 
        (defensive_candidates['season_points'] >= 15)  # Higher threshold for GKs
    ]
    if len(gk_candidates) > 0:
        gk_leaders = gk_candidates.nlargest(5, 'season_points')
        print(f"\n🧤 Top Goalkeepers (By Points):")
        for i, (_, player) in enumerate(gk_leaders.iterrows(), 1):
            cs_rate = (player['season_CS'] / player['games_played']) * 100 if player['games_played'] > 0 else 0
            ppg = player['season_points'] / player['games_played'] if player['games_played'] > 0 else 0
            print(f"  {i}. {player['web_name']} ({player['team_name']}): {player['season_points']:.0f} pts ({ppg:.1f} ppg), {player['season_CS']:.0f} CS ({cs_rate:.1f}%)")
    else:
        print(f"\n🧤 No goalkeepers found with sufficient playing time")
        
    # Best Defenders by tackles + defensive contribution
    active_defenders = defensive_candidates[
        (defensive_candidates['position_name'] == 'Defender') & 
        (defensive_candidates['season_tackles'] > 0)
    ]
    if len(active_defenders) > 0:
        tackle_leaders = active_defenders.nlargest(5, 'season_tackles')
        print(f"\n⚔️ Most Active Defenders (Tackles):")
        for i, (_, player) in enumerate(tackle_leaders.iterrows(), 1):
            tackles_per_game = player['season_tackles'] / player['games_played'] if player['games_played'] > 0 else 0
            print(f"  {i}. {player['web_name']} ({player['team_name']}): {player['season_tackles']:.0f} tackles ({tackles_per_game:.1f}/game), {player['season_points']:.0f} pts")
    
else:
    print("❌ No defensive players found with sufficient playing time")

# 6. OVERPERFORMANCE ANALYSIS - Sustainability insights
print(f"\n📈 OVERPERFORMANCE ANALYSIS (Sustainability Check)")
print("-" * 50)

# Goal overperformers - using season stats
if 'season_xG' in season_stats.columns and 'season_goals' in season_stats.columns:
    # Calculate overperformance
    overperformance_candidates = season_stats[
        (season_stats['season_goals'] > 0) & 
        (season_stats['season_xG'] > 0) &
        (season_stats['games_played'] >= 3)
    ].copy()
    
    overperformance_candidates['goal_overperformance'] = overperformance_candidates['season_goals'] - overperformance_candidates['season_xG']
    
    # Top goal overperformers (might regress)
    goal_overperformers = overperformance_candidates[overperformance_candidates['goal_overperformance'] > 0.5].nlargest(5, 'goal_overperformance')
    print("⚡ Top Goal Overperformers (Potential Regression Risk):")
    for i, (_, player) in enumerate(goal_overperformers.iterrows(), 1):
        print(f"  {i}. {player['web_name']} ({player['team_name']}): {player['season_goals']:.0f} goals vs {player['season_xG']:.1f} xG (+{player['goal_overperformance']:.1f})")
    
    # Sustainable goal scorers (goals close to xG)
    sustainable_scorers = overperformance_candidates[
        (abs(overperformance_candidates['goal_overperformance']) <= 1.0) &
        (overperformance_candidates['season_goals'] >= 3)
    ].nlargest(5, 'season_goals')
    print(f"\n✅ Sustainable Goal Scorers (Goals ≈ xG):")
    for i, (_, player) in enumerate(sustainable_scorers.iterrows(), 1):
        print(f"  {i}. {player['web_name']} ({player['team_name']}): {player['season_goals']:.0f} goals vs {player['season_xG']:.1f} xG ({player['goal_overperformance']:.1f})")

# Assist overperformers
if 'season_xA' in season_stats.columns and 'season_assists' in season_stats.columns:
    assist_candidates = season_stats[
        (season_stats['season_assists'] > 0) & 
        (season_stats['season_xA'] > 0) &
        (season_stats['games_played'] >= 3)
    ].copy()
    
    assist_candidates['assist_overperformance'] = assist_candidates['season_assists'] - assist_candidates['season_xA']
    
    # Top assist overperformers
    assist_overperformers = assist_candidates[assist_candidates['assist_overperformance'] > 0.3].nlargest(5, 'assist_overperformance')
    print(f"\n🎯 Top Assist Overperformers (Potential Regression Risk):")
    for i, (_, player) in enumerate(assist_overperformers.iterrows(), 1):
        print(f"  {i}. {player['web_name']} ({player['team_name']}): {player['season_assists']:.0f} assists vs {player['season_xA']:.1f} xA (+{player['assist_overperformance']:.1f})")

print(f"\n💡 INTERPRETATION:")
print("• Overperformers may see point drops as they regress to expected stats")
print("• Sustainable performers are reliable long-term picks")
print("• Consider selling overperformers at peak value")

🏆 === FPL KEY INSIGHTS & RECOMMENDATIONS ===

⭐ TOP 10 SEASON PERFORMERS
--------------------------------------------------
 1. Haaland (Forward, Man City)
    62 pts (10.3 ppg) | £14.4m | 53.6% owned
 2. Semenyo (Midfielder, Bournemouth)
    48 pts (8.0 ppg) | £7.8m | 53.4% owned
 3. Senesi (Defender, Bournemouth)
    44 pts (7.3 ppg) | £4.9m | 20.6% owned
 4. Guéhi (Defender, Crystal Palace)
    43 pts (7.2 ppg) | £4.8m | 27.9% owned
 5. Anthony (Midfielder, Burnley)
    40 pts (6.7 ppg) | £5.6m | 4.6% owned
 6. Alderete (Defender, Sunderland)
    39 pts (6.5 ppg) | £4.0m | 4.2% owned
 7. Enzo (Midfielder, Chelsea)
    39 pts (6.5 ppg) | £6.7m | 13.5% owned
 8. Roefs (Goalkeeper, Sunderland)
    39 pts (6.5 ppg) | £4.5m | 3.4% owned
 9. Gabriel (Defender, Arsenal)
    38 pts (6.3 ppg) | £6.2m | 25.4% owned
10. J.Timber (Defender, Arsenal)
    37 pts (6.2 ppg) | £5.8m | 14.5% owned

💰 BEST VALUE PLAYERS (£/Points Efficiency)
--------------------------------------------------
1. Livram

# 6️⃣ Strategic Analysis Tools {#strategic-analysis-tools}

## ⚔️ Advanced FPL Analysis Functions

This section contains powerful, reusable functions for Fantasy Premier League strategic analysis:

### 🔧 **Available Tools:**
1. **Defender Rankings** - Rank defenders by clean sheet potential and value
2. **Attacker Rankings** - Rank attacking players by goal/assist potential  
3. **Team Strength Analysis** - Calculate attacking and defensive strength for all teams
4. **Fixture Difficulty Calculator** - Score any specific matchup

### 📊 **Key Features:**
- Uses **cumulative season statistics** for accuracy
- Considers expected stats (xG, xA, xCS) for sustainability  
- Includes value scoring (points per £million)
- Accounts for consistency and minutes played
- Easily customizable parameters

In [103]:
import pandas as pd
import numpy as np
from typing import Optional, List

def calculate_team_stats_corrected(season_data: pd.DataFrame) -> tuple:
    """
    Calculate attacking and defensive statistics for each team using cumulative season data.
    
    Args:
        season_data: DataFrame with cumulative season statistics per player
        
    Returns:
        tuple: (attacking_stats, defensive_stats) DataFrames
    """
    # Attacking stats by team (aggregate all players from each team)
    attacking_stats = season_data.groupby('team_name').agg({
        'season_xG': 'sum',      # Total team xG
        'season_goals': 'sum',   # Total team goals
        'season_shots': 'sum',   # Total team shots
        'season_SoT': 'sum',     # Total team shots on target
        'season_minutes': 'sum', # Total team minutes
        'games_played': 'mean'   # Average games played (should be similar for all players)
    }).round(3)
    
    # Convert totals to per-game averages
    attacking_stats['avg_xG_for'] = attacking_stats['season_xG'] / attacking_stats['games_played']
    attacking_stats['avg_G_for'] = attacking_stats['season_goals'] / attacking_stats['games_played']
    attacking_stats['avg_shots_for'] = attacking_stats['season_shots'] / attacking_stats['games_played']
    attacking_stats['avg_SoT_for'] = attacking_stats['season_SoT'] / attacking_stats['games_played']
    
    # For defensive stats, we need to use the original gameweek data to get opponent information
    # Since we don't have opponent data in season_stats, we'll use a simplified approach
    # based on goals conceded for defensive teams (GK + DEF)
    defensive_players = season_data[season_data['element_type'].isin([1, 2])]  # GK and DEF
    
    defensive_stats = defensive_players.groupby('team_name').agg({
        'season_GC': 'mean',     # Average goals conceded per defensive player
        'season_xGC': 'mean',    # Average xGC per defensive player  
        'games_played': 'mean'   # Average games played
    }).round(3)
    
    # Convert to per-game averages (rename for consistency)
    defensive_stats['avg_G_conceded'] = defensive_stats['season_GC'] / defensive_stats['games_played']
    defensive_stats['avg_xG_conceded'] = defensive_stats['season_xGC'] / defensive_stats['games_played']
    
    return attacking_stats, defensive_stats

def rank_fixtures_corrected(season_data: pd.DataFrame, upcoming_gameweeks: Optional[List[int]] = None) -> pd.DataFrame:
    """
    Analyze and rank fixtures based on attacking strength vs defensive weakness using season data.
    
    Args:
        season_data: DataFrame with cumulative season statistics
        upcoming_gameweeks: List of gameweek numbers to analyze (if None, uses next 3 GWs)
    
    Returns:
        DataFrame with ranked fixtures showing favorability scores
    """
    if upcoming_gameweeks is None:
        current_gw = season_data['last_gameweek'].max()
        upcoming_gameweeks = [current_gw + 1, current_gw + 2, current_gw + 3]
    
    # Get team statistics
    attacking_stats, defensive_stats = calculate_team_stats_corrected(season_data)
    
    # Create fixtures matrix
    teams = season_data['team_name'].unique()
    fixtures = []
    
    for gw in upcoming_gameweeks:
        for home_team in teams:
            for away_team in teams:
                if home_team != away_team:
                    fixtures.append({
                        'gameweek': gw,
                        'home_team': home_team,
                        'away_team': away_team,
                        'fixture': f"{home_team} vs {away_team}"
                    })
    
    fixture_df = pd.DataFrame(fixtures)
    
    # Add attacking stats for home team
    fixture_df = fixture_df.merge(
        attacking_stats[['avg_xG_for', 'avg_G_for', 'avg_shots_for', 'avg_SoT_for']], 
        left_on='home_team', 
        right_index=True, 
        how='left'
    )
    
    # Add defensive stats for away team
    fixture_df = fixture_df.merge(
        defensive_stats[['avg_xG_conceded', 'avg_G_conceded']], 
        left_on='away_team', 
        right_index=True, 
        how='left'
    )
    
    # Calculate favorability scores
    fixture_df['attacking_strength'] = (
        fixture_df['avg_xG_for'] * 0.4 + 
        fixture_df['avg_G_for'] * 0.3 + 
        fixture_df['avg_shots_for'] * 0.2 + 
        fixture_df['avg_SoT_for'] * 0.1
    )
    
    fixture_df['defensive_weakness'] = (
        fixture_df['avg_xG_conceded'] * 0.6 + 
        fixture_df['avg_G_conceded'] * 0.4
    )
    
    # Overall favorability score
    fixture_df['favorability_score'] = (
        fixture_df['attacking_strength'] * 0.6 + 
        fixture_df['defensive_weakness'] * 0.4
    )
    
    # Add difficulty rating
    fixture_df['difficulty_rating'] = pd.cut(
        fixture_df['favorability_score'], 
        bins=5, 
        labels=['Very Hard', 'Hard', 'Medium', 'Easy', 'Very Easy']
    )
    
    # Sort by favorability
    result = fixture_df.sort_values(['gameweek', 'favorability_score'], ascending=[True, False])
    
    output_cols = [
        'gameweek', 'fixture', 'home_team', 'away_team', 'favorability_score', 
        'difficulty_rating', 'attacking_strength', 'defensive_weakness',
        'avg_xG_for', 'avg_G_for', 'avg_xG_conceded', 'avg_G_conceded'
    ]
    
    return result[output_cols].round(3)

print("=== CORRECTED FIXTURE ANALYSIS FUNCTION CREATED ===")
print("Function: rank_fixtures_corrected(season_data, upcoming_gameweeks=None)")
print("Purpose: Identifies favorable fixtures using CUMULATIVE season statistics")
print("\nKey Changes:")
print("- Uses season_stats dataframe instead of gameweek data")
print("- Calculates team attacking/defensive strength from cumulative player stats")
print("- More accurate representation of team form over the season")

=== CORRECTED FIXTURE ANALYSIS FUNCTION CREATED ===
Function: rank_fixtures_corrected(season_data, upcoming_gameweeks=None)
Purpose: Identifies favorable fixtures using CUMULATIVE season statistics

Key Changes:
- Uses season_stats dataframe instead of gameweek data
- Calculates team attacking/defensive strength from cumulative player stats
- More accurate representation of team form over the season


In [104]:
def filter_defenders_corrected(season_data: pd.DataFrame, min_games: int = 3, top_n: int = 20) -> pd.DataFrame:
    """
    Rank defenders by clean sheet potential using cumulative season data.
    
    Args:
        season_data: DataFrame with cumulative season statistics
        min_games: Minimum games played to be considered
        top_n: Number of top defenders to return
    
    Returns:
        DataFrame with ranked defenders based on season performance
    """
    # Filter for defenders only
    defenders = season_data[season_data['element_type'] == 2].copy()
    
    # Filter by minimum games played
    defenders = defenders[defenders['games_played'] >= min_games]
    
    if len(defenders) == 0:
        return pd.DataFrame()
    
    # Calculate performance metrics
    defenders['clean_sheet_rate'] = (defenders['season_CS'] / defenders['games_played']).fillna(0)
    defenders['xCS_per_game'] = (defenders['season_xCS'] / defenders['games_played']).fillna(0)
    defenders['goals_conceded_per_game'] = (defenders['season_GC'] / defenders['games_played']).fillna(0)
    defenders['minutes_per_game'] = defenders['season_minutes'] / defenders['games_played']
    defenders['consistency_score'] = np.minimum(defenders['minutes_per_game'] / 90, 1)
    
    # Clean sheet potential score
    defenders['clean_sheet_potential'] = (
        defenders['xCS_per_game'] * 0.4 +
        defenders['clean_sheet_rate'] * 0.35 +
        (1 / (defenders['goals_conceded_per_game'] + 0.1)) * 0.15 +  # Lower goals conceded = better
        defenders['consistency_score'] * 0.1
    )
    
    # Value score
    defenders['value_score'] = defenders['season_points'] / defenders['now_cost']
    
    # Overall defender score  
    defenders['defender_score'] = (
        defenders['clean_sheet_potential'] * 0.6 +
        defenders['value_score'] * 0.25 +
        defenders['consistency_score'] * 0.15
    )
    
    # Sort by defender score
    result = defenders.sort_values('defender_score', ascending=False)
    
    # Select key columns
    output_cols = [
        'web_name', 'team_name', 'now_cost', 'selected_by_percent',
        'defender_score', 'clean_sheet_potential', 'value_score', 'consistency_score',
        'games_played', 'clean_sheet_rate', 'xCS_per_game', 'goals_conceded_per_game',
        'season_points', 'season_minutes', 'season_CS', 'season_xCS'
    ]
    
    return result[output_cols].head(top_n).round(3)

def filter_attackers_corrected(season_data: pd.DataFrame, min_games: int = 3, top_n: int = 20, positions: List[int] = [3, 4]) -> pd.DataFrame:
    """
    Rank attackers using cumulative season data.
    
    Args:
        season_data: DataFrame with cumulative season statistics
        min_games: Minimum games played to be considered
        top_n: Number of top attackers to return
        positions: List of position types to include (3=Midfielder, 4=Forward)
    
    Returns:
        DataFrame with ranked attackers based on season performance
    """
    # Filter for attackers
    attackers = season_data[season_data['element_type'].isin(positions)].copy()
    
    # Filter by minimum games
    attackers = attackers[attackers['games_played'] >= min_games]
    
    if len(attackers) == 0:
        return pd.DataFrame()
    
    # Calculate performance metrics
    attackers['goals_per_game'] = (attackers['season_goals'] / attackers['games_played']).fillna(0)
    attackers['assists_per_game'] = (attackers['season_assists'] / attackers['games_played']).fillna(0)
    attackers['xG_per_game'] = (attackers['season_xG'] / attackers['games_played']).fillna(0)
    attackers['xA_per_game'] = (attackers['season_xA'] / attackers['games_played']).fillna(0)
    attackers['shots_per_game'] = (attackers['season_shots'] / attackers['games_played']).fillna(0)
    attackers['SoT_per_game'] = (attackers['season_SoT'] / attackers['games_played']).fillna(0)
    attackers['SiB_per_game'] = (attackers['season_SiB'] / attackers['games_played']).fillna(0)
    attackers['minutes_per_game'] = attackers['season_minutes'] / attackers['games_played']
    
    # Attacking threat score
    attackers['attacking_threat'] = (
        attackers['xG_per_game'] * 0.3 +
        attackers['xA_per_game'] * 0.25 +
        attackers['goals_per_game'] * 0.2 +
        attackers['assists_per_game'] * 0.15 +
        attackers['SoT_per_game'] * 0.05 +
        attackers['SiB_per_game'] * 0.05
    )
    
    # Consistency score
    attackers['consistency_score'] = np.minimum(attackers['minutes_per_game'] / 90, 1)
    
    # Value score
    attackers['value_score'] = attackers['season_points'] / attackers['now_cost']
    
    # Overall attacker score
    attackers['attacker_score'] = (
        attackers['attacking_threat'] * 0.6 +
        attackers['value_score'] * 0.25 +
        attackers['consistency_score'] * 0.15
    )
    
    # Sort by attacker score
    result = attackers.sort_values('attacker_score', ascending=False)
    
    # Select key columns
    output_cols = [
        'web_name', 'team_name', 'position_name', 'now_cost', 'selected_by_percent',
        'attacker_score', 'attacking_threat', 'value_score', 'consistency_score',
        'games_played', 'goals_per_game', 'assists_per_game', 'xG_per_game', 'xA_per_game',
        'shots_per_game', 'SoT_per_game', 'season_points', 'season_minutes'
    ]
    
    return result[output_cols].head(top_n).round(3)

print("=== CORRECTED DEFENDER & ATTACKER FILTERING FUNCTIONS CREATED ===")
print("Functions: filter_defenders_corrected() & filter_attackers_corrected()")
print("Purpose: Rank players using CUMULATIVE season statistics")
print("\nKey Changes:")
print("- Uses season_stats dataframe with cumulative data")
print("- Changed min_minutes to min_games for more intuitive filtering")
print("- Calculates per-game averages from season totals")
print("- More accurate player performance assessment")

=== CORRECTED DEFENDER & ATTACKER FILTERING FUNCTIONS CREATED ===
Functions: filter_defenders_corrected() & filter_attackers_corrected()
Purpose: Rank players using CUMULATIVE season statistics

Key Changes:
- Uses season_stats dataframe with cumulative data
- Changed min_minutes to min_games for more intuitive filtering
- Calculates per-game averages from season totals
- More accurate player performance assessment


# 7️⃣ Feature 2 Ranking Leaderboard


In [7]:
# 🏆 ENHANCED TEAM STRENGTH RANKINGS
print("="*70)
print("📊 COMPREHENSIVE TEAM STRENGTH RANKINGS")
print("="*70)
print("💡 Enhanced with all available defensive metrics for accurate fixture assessment")

def create_comprehensive_team_strength_rankings(season_data: pd.DataFrame) -> pd.DataFrame:
    """
    Create comprehensive team strength rankings using all available defensive metrics.
    Enhanced calculation includes tackles, recoveries, clearances, and expected stats.
    """
    
    # ⚽ ATTACKING STRENGTH CALCULATION
    attacking_stats = season_data.groupby('team_name').agg({
        'season_goals': 'sum',
        'season_xG': 'sum', 
        'season_assists': 'sum',
        'season_xA': 'sum',
        'season_shots': 'sum',
        'season_SoT': 'sum',
        'season_key_passes': 'sum',
        'games_played': 'mean'
    }).round(3)
    
    # 🛡️ COMPREHENSIVE DEFENSIVE STRENGTH CALCULATION
    # Include all defensive players (GK + DEF)
    defensive_players = season_data[season_data['element_type'].isin([1, 2])]
    
    if len(defensive_players) == 0:
        print("⚠️ Warning: No defensive players found in dataset")
        defensive_stats = pd.DataFrame(index=attacking_stats.index)
        # Set default values for missing defensive data
        default_values = {
            'season_CS': 3.0, 'season_xCS': 3.0, 'season_GC': 1.5, 'season_xGC': 1.5,
            'season_tackles': 15.0, 'season_recoveries': 20.0, 
            'season_CBI': 10.0, 'season_defensive_contribution': 5.0,
            'games_played': attacking_stats['games_played'].iloc[0] if len(attacking_stats) > 0 else 6
        }
        for col, val in default_values.items():
            defensive_stats[col] = val
    else:
        # Aggregate all available defensive metrics
        agg_dict = {
            'games_played': 'mean'
        }
        
        # Add available defensive columns
        defensive_columns = ['season_CS', 'season_xCS', 'season_GC', 'season_xGC',
                           'season_tackles', 'season_recoveries', 
                           'season_clearances_blocks_interceptions', 'season_defensive_contribution']
        
        for col in defensive_columns:
            if col in defensive_players.columns:
                agg_dict[col] = 'mean'
        
        defensive_stats = defensive_players.groupby('team_name').agg(agg_dict).round(3)
        
        # Rename long column name for easier handling
        if 'season_clearances_blocks_interceptions' in defensive_stats.columns:
            defensive_stats.rename(columns={'season_clearances_blocks_interceptions': 'season_CBI'}, inplace=True)
    
    # 📊 CALCULATE PER-GAME METRICS
    
    # Attacking per-game metrics
    attacking_stats['goals_pg'] = attacking_stats['season_goals'] / attacking_stats['games_played']
    attacking_stats['xG_pg'] = attacking_stats['season_xG'] / attacking_stats['games_played']
    attacking_stats['assists_pg'] = attacking_stats['season_assists'] / attacking_stats['games_played']
    attacking_stats['xA_pg'] = attacking_stats['season_xA'] / attacking_stats['games_played']
    attacking_stats['shots_pg'] = attacking_stats['season_shots'] / attacking_stats['games_played']
    attacking_stats['key_passes_pg'] = attacking_stats['season_key_passes'] / attacking_stats['games_played']
    
    # Defensive per-game metrics
    defensive_stats['CS_rate'] = defensive_stats['season_CS'] / defensive_stats['games_played']
    defensive_stats['xCS_rate'] = defensive_stats['season_xCS'] / defensive_stats['games_played']
    defensive_stats['GC_pg'] = defensive_stats['season_GC'] / defensive_stats['games_played']
    defensive_stats['xGC_pg'] = defensive_stats['season_xGC'] / defensive_stats['games_played']
    
    if 'season_tackles' in defensive_stats.columns:
        defensive_stats['tackles_pg'] = defensive_stats['season_tackles'] / defensive_stats['games_played']
    if 'season_recoveries' in defensive_stats.columns:
        defensive_stats['recoveries_pg'] = defensive_stats['season_recoveries'] / defensive_stats['games_played']
    if 'season_CBI' in defensive_stats.columns:
        defensive_stats['CBI_pg'] = defensive_stats['season_CBI'] / defensive_stats['games_played']
    if 'season_defensive_contribution' in defensive_stats.columns:
        defensive_stats['def_contrib_pg'] = defensive_stats['season_defensive_contribution'] / defensive_stats['games_played']
    
    # 🎯 ENHANCED STRENGTH CALCULATIONS
    
    # Attack Strength (weighted combination of multiple metrics)
    attacking_stats['attack_strength'] = (
        attacking_stats['xG_pg'] * 0.25 +           # Expected goals (predictive)
        attacking_stats['goals_pg'] * 0.20 +        # Actual goals (results)
        attacking_stats['xA_pg'] * 0.20 +           # Expected assists (creativity)
        attacking_stats['assists_pg'] * 0.15 +      # Actual assists
        attacking_stats['shots_pg'] * 0.10 +        # Shot volume
        attacking_stats['key_passes_pg'] * 0.10     # Key passes (creativity)
    )
    
    # Comprehensive Defense Strength (using all available metrics)
    defense_components = []
    weights = []
    
    # Core defensive metrics (always available)
    defense_components.append(defensive_stats['CS_rate'])
    weights.append(0.25)  # Clean sheet rate
    
    defense_components.append(1 / (defensive_stats['GC_pg'] + 0.1))
    weights.append(0.20)  # Goals conceded (inverted)
    
    # Expected metrics (if available)
    if 'xCS_rate' in defensive_stats.columns:
        defense_components.append(defensive_stats['xCS_rate'])
        weights.append(0.15)  # Expected clean sheet rate
    
    if 'xGC_pg' in defensive_stats.columns:
        defense_components.append(1 / (defensive_stats['xGC_pg'] + 0.1))
        weights.append(0.15)  # Expected goals conceded (inverted)
    
    # 🎯 IMPROVED: Defensive actions with min-max normalization (more balanced)
    if 'tackles_pg' in defensive_stats.columns:
        tackles_norm = defensive_stats['tackles_pg'] / defensive_stats['tackles_pg'].max() if defensive_stats['tackles_pg'].max() > 0 else defensive_stats['tackles_pg']
        defense_components.append(tackles_norm)
        weights.append(0.10)
    
    if 'recoveries_pg' in defensive_stats.columns:
        recoveries_norm = defensive_stats['recoveries_pg'] / defensive_stats['recoveries_pg'].max() if defensive_stats['recoveries_pg'].max() > 0 else defensive_stats['recoveries_pg']
        defense_components.append(recoveries_norm)
        weights.append(0.05)
    
    if 'CBI_pg' in defensive_stats.columns:
        cbi_norm = defensive_stats['CBI_pg'] / defensive_stats['CBI_pg'].max() if defensive_stats['CBI_pg'].max() > 0 else defensive_stats['CBI_pg']
        defense_components.append(cbi_norm)
        weights.append(0.05)
    
    if 'def_contrib_pg' in defensive_stats.columns:
        def_contrib_norm = defensive_stats['def_contrib_pg'] / defensive_stats['def_contrib_pg'].max() if defensive_stats['def_contrib_pg'].max() > 0 else defensive_stats['def_contrib_pg']
        defense_components.append(def_contrib_norm)
        weights.append(0.05)
    
    # Normalize weights to sum to 1
    total_weight = sum(weights)
    weights = [w/total_weight for w in weights]
    
    # Calculate weighted defensive strength
    defensive_stats['defense_strength'] = sum(comp * weight for comp, weight in zip(defense_components, weights))
    
    # 🏆 COMBINE TEAM RANKINGS
    team_rankings = attacking_stats[['attack_strength']].join(
        defensive_stats[['defense_strength']], how='outer'
    )
    
    # 🔧 FIXED: Handle missing data (pandas 3.0 compatible)
    team_rankings = team_rankings.fillna({
        'attack_strength': team_rankings['attack_strength'].median(),
        'defense_strength': team_rankings['defense_strength'].median()
    })
    
    # Overall strength calculation
    team_rankings['overall_strength'] = (
        team_rankings['attack_strength'] * 0.6 + 
        team_rankings['defense_strength'] * 0.4
    )
    
    # Generate rankings
    team_rankings['attack_rank'] = team_rankings['attack_strength'].rank(ascending=False, method='dense').astype(int)
    team_rankings['defense_rank'] = team_rankings['defense_strength'].rank(ascending=False, method='dense').astype(int)
    team_rankings['overall_rank'] = team_rankings['overall_strength'].rank(ascending=False, method='dense').astype(int)
    
    return team_rankings.round(3)

# Generate comprehensive team rankings
team_rankings = create_comprehensive_team_strength_rankings(season_stats)
team_rankings_sorted = team_rankings.sort_values('overall_rank')

print("🏆 COMPREHENSIVE TEAM STRENGTH RANKINGS")
print("=" * 65)
print("📋 All Teams Ranked (Enhanced with Complete Defensive Analysis):")
print(team_rankings_sorted[['overall_rank', 'attack_rank', 'defense_rank', 
                           'overall_strength', 'attack_strength', 'defense_strength']].to_string())

print(f"\n⚽ TOP 8 ATTACKING TEAMS:")
attack_rankings = team_rankings.sort_values('attack_rank').head(8)
for idx, (team, data) in enumerate(attack_rankings.iterrows(), 1):
    print(f" {int(data['attack_rank']):2d}. {team:<15} (Attack: {data['attack_strength']:.3f})")

print(f"\n🛡️ TOP 8 DEFENSIVE TEAMS:")
defense_rankings = team_rankings.sort_values('defense_rank').head(8)
for idx, (team, data) in enumerate(defense_rankings.iterrows(), 1):
    print(f" {int(data['defense_rank']):2d}. {team:<15} (Defense: {data['defense_strength']:.3f})")

print(f"\n📊 DEFENSIVE STRENGTH CALCULATION INCLUDES:")
print("   • Clean Sheet Rate (25%)")
print("   • Goals Conceded Rate (20%)")
print("   • Expected Clean Sheets (15%)")
print("   • Expected Goals Conceded (15%)")
print("   • Tackles per Game (10%)")
print("   • Recoveries per Game (5%)")
print("   • Clearances/Blocks/Interceptions (5%)")
print("   • Defensive Contribution (5%)")
print("   📈 More comprehensive than previous CS + GC only calculation")

📊 COMPREHENSIVE TEAM STRENGTH RANKINGS
💡 Enhanced with all available defensive metrics for accurate fixture assessment


NameError: name 'season_stats' is not defined

In [106]:
# SOLUTION 3: Player Recommendations for Team Matchups
print("\n" + "="*80)
print("=== SOLUTION 3: PLAYER RECOMMENDATIONS BY OPPONENT STRENGTH ===")
print("Find best players against weak defenses/attacks")

def get_players_for_matchup(team: str, opponent_type: str, season_data: pd.DataFrame, 
                           team_rankings: pd.DataFrame, top_n: int = 8) -> pd.DataFrame:
    """
    Get player recommendations based on opponent strength.
    
    Args:
        team: Team name to get players from
        opponent_type: 'weak_defense' for attackers, 'weak_attack' for defenders
        season_data: Player season statistics
        team_rankings: Team strength rankings
        top_n: Number of players to return
    """
    team_players = season_data[season_data['team_name'] == team].copy()
    
    if len(team_players) == 0:
        return pd.DataFrame()
    
    if opponent_type == 'weak_defense':
        # Get attacking players when facing weak defenses
        attackers = team_players[team_players['element_type'].isin([3, 4])]  # MID + FWD
        attackers = attackers[attackers['games_played'] >= 3]
        
        if len(attackers) == 0:
            return pd.DataFrame()
            
        # Score based on attacking output and value
        attackers['matchup_score'] = (
            attackers['goals_per_game'] * 3 +
            attackers['assists_per_game'] * 2 +
            attackers['points_per_game'] * 0.5 +
            attackers['points_per_million'] * 0.3
        )
        
        result = attackers.sort_values('matchup_score', ascending=False).head(top_n)
        return result[['web_name', 'position_name', 'season_points', 'now_cost', 
                      'goals_per_game', 'assists_per_game', 'points_per_game', 
                      'points_per_million', 'matchup_score']].round(2)
        
    else:  # weak_attack
        # Get defensive players when facing weak attacks
        defenders = team_players[team_players['element_type'].isin([1, 2])]  # GK + DEF
        defenders = defenders[defenders['games_played'] >= 3]
        
        if len(defenders) == 0:
            return pd.DataFrame()
            
        # Score based on clean sheet potential and value
        defenders['clean_sheet_rate'] = defenders['season_CS'] / defenders['games_played']
        defenders['matchup_score'] = (
            defenders['clean_sheet_rate'] * 4 +
            defenders['points_per_game'] * 0.6 +
            defenders['points_per_million'] * 0.4
        )
        
        result = defenders.sort_values('matchup_score', ascending=False).head(top_n)
        return result[['web_name', 'position_name', 'season_points', 'now_cost',
                      'clean_sheet_rate', 'points_per_game', 'points_per_million', 
                      'matchup_score']].round(2)

# Find teams with weak defenses (good for attacking players)
weak_defenses = team_rankings.sort_values('defense_rank', ascending=False).head(8)
print("🎯 TEAMS WITH WEAK DEFENSES (Target for Attackers):")
print("=" * 55)
for team in weak_defenses.index:
    defense_rank = int(weak_defenses.loc[team, 'defense_rank'])
    defense_strength = weak_defenses.loc[team, 'defense_strength']
    print(f"{defense_rank:2d}. {team:<15} (Defense: {defense_strength:.3f})")

# Find teams with weak attacks (good for defenders)
weak_attacks = team_rankings.sort_values('attack_rank', ascending=False).head(8)
print(f"\n🛡️ TEAMS WITH WEAK ATTACKS (Good for Defenders):")
print("=" * 50)
for team in weak_attacks.index:
    attack_rank = int(weak_attacks.loc[team, 'attack_rank'])
    attack_strength = weak_attacks.loc[team, 'attack_strength']
    print(f"{attack_rank:2d}. {team:<15} (Attack: {attack_strength:.3f})")

# SHOW ALL TEAMS: Complete attacking rankings with player recommendations
print(f"\n⚽ ATTACKING PICKS FROM ALL TEAMS (Sorted by Attack Rank):")
print("=" * 60)
all_attacking_teams = team_rankings.sort_values('attack_rank')  # ALL teams sorted by attack rank

for idx, (team, data) in enumerate(all_attacking_teams.iterrows()):
    if team in season_stats['team_name'].values:
        attack_rank = int(data['attack_rank'])
        attack_strength = data['attack_strength']
        
        attackers = get_players_for_matchup(team, 'weak_defense', season_stats, team_rankings, 3)
        if not attackers.empty:
            print(f"\n🔴 {team} (#{attack_rank} Attack, Strength: {attack_strength:.3f}):")
            print(attackers[['web_name', 'position_name', 'now_cost', 'goals_per_game', 'assists_per_game', 'points_per_game']].to_string(index=False))
        else:
            print(f"\n🔴 {team} (#{attack_rank} Attack, Strength: {attack_strength:.3f}): No attacking players found")

# SHOW ALL TEAMS: Complete defensive rankings with player recommendations  
print(f"\n🛡️ DEFENSIVE PICKS FROM ALL TEAMS (Sorted by Defense Rank):")
print("=" * 60)
all_defensive_teams = team_rankings.sort_values('defense_rank')  # ALL teams sorted by defense rank

for idx, (team, data) in enumerate(all_defensive_teams.iterrows()):
    if team in season_stats['team_name'].values:
        defense_rank = int(data['defense_rank'])
        defense_strength = data['defense_strength']
        
        defenders = get_players_for_matchup(team, 'weak_attack', season_stats, team_rankings, 3)
        if not defenders.empty:
            print(f"\n🔵 {team} (#{defense_rank} Defense, Strength: {defense_strength:.3f}):")
            print(defenders[['web_name', 'position_name', 'now_cost', 'clean_sheet_rate', 'points_per_game', 'points_per_million']].to_string(index=False))
        else:
            print(f"\n🔵 {team} (#{defense_rank} Defense, Strength: {defense_strength:.3f}): No defensive players found")


=== SOLUTION 3: PLAYER RECOMMENDATIONS BY OPPONENT STRENGTH ===
Find best players against weak defenses/attacks
🎯 TEAMS WITH WEAK DEFENSES (Target for Attackers):
20. Burnley         (Defense: 0.318)
19. Nott'm Forest   (Defense: 0.347)
18. Man Utd         (Defense: 0.351)
17. Brighton        (Defense: 0.351)
16. West Ham        (Defense: 0.373)
15. Brentford       (Defense: 0.381)
14. Wolves          (Defense: 0.388)
13. Fulham          (Defense: 0.482)

🛡️ TEAMS WITH WEAK ATTACKS (Good for Defenders):
20. Burnley         (Attack: 2.134)
19. Wolves          (Attack: 2.382)
18. Brentford       (Attack: 2.437)
17. Aston Villa     (Attack: 2.474)
16. Sunderland      (Attack: 2.513)
15. Newcastle       (Attack: 2.535)
14. Leeds           (Attack: 2.559)
13. Fulham          (Attack: 2.593)

⚽ ATTACKING PICKS FROM ALL TEAMS (Sorted by Attack Rank):

🔴 Liverpool (#1 Attack, Strength: 4.032):
   web_name position_name  now_cost  goals_per_game  assists_per_game  points_per_game
Gravenberch  

# 🔮 ENHANCED FIXTURE ANALYZER - SEASON-WIDE ANALYSIS

**The next-generation fixture analysis system that provides:**
- ✅ **Complete season fixture analysis** (all gameweeks)
- ✅ **Visual difficulty heatmaps** 
- ✅ **Strategic transfer timing recommendations**
- ✅ **Advanced team matchup intelligence**
- ✅ **Position-specific fixture insights**

This enhanced system works alongside your existing analysis to provide deeper strategic insights for long-term FPL planning.

In [1]:
# =============================================================================
# 🔮 ENHANCED FIXTURE ANALYZER CLASS
# =============================================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

class EnhancedFixtureAnalyzer:
    """
    Advanced fixture analysis system for complete season planning
    
    Features:
    - Season-wide fixture difficulty analysis
    - Visual heatmaps and charts
    - Strategic transfer timing recommendations
    - Position-specific insights
    - Team matchup intelligence
    """
    
    def __init__(self, season_stats, team_rankings, fixtures_path='fixture_template.csv'):
        """Initialize with your existing data"""
        self.season_stats = season_stats
        self.team_rankings = team_rankings  # Use the SAME rankings as Basic System
        self.fixtures_df = pd.read_csv(fixtures_path)
        self._process_data()
        
    def _process_data(self):
        """Process the data and create team mappings"""
        # Map fixture team names to your existing team_rankings
        self._map_team_names()
        

    
    def _map_team_names(self):
        """Map fixture team names to season_stats team names"""
        fixture_teams = set(self.fixtures_df['home_team'].unique()) | set(self.fixtures_df['away_team'].unique())
        season_teams = set(self.season_stats['team_name'].unique())
        
        self.team_mapping = {}
        
        for fixture_team in fixture_teams:
            # Try exact match first
            if fixture_team in season_teams:
                self.team_mapping[fixture_team] = fixture_team
                continue
            
            # Try partial matching
            best_match = None
            for season_team in season_teams:
                if (fixture_team.lower().replace(' ', '') in season_team.lower().replace(' ', '') or
                    season_team.lower().replace(' ', '') in fixture_team.lower().replace(' ', '')):
                    best_match = season_team
                    break
            
            if best_match:
                self.team_mapping[fixture_team] = best_match
            else:
                # Create a default mapping for missing teams
                self.team_mapping[fixture_team] = fixture_team
                print(f"⚠️ Could not match '{fixture_team}' - using default mapping")
                
    def get_fixture_difficulty_matrix(self, start_gw=None, end_gw=None, home_advantage=0.7):
        """Create fixture difficulty matrix using BASIC SYSTEM calculation method"""
        if start_gw is None:
            start_gw = self.fixtures_df['gameweek'].min()
        if end_gw is None:
            end_gw = self.fixtures_df['gameweek'].max()
            
        fixtures_period = self.fixtures_df[
            (self.fixtures_df['gameweek'] >= start_gw) & 
            (self.fixtures_df['gameweek'] <= end_gw)
        ].copy()
        
        # Add difficulty scores using BASIC SYSTEM method
        difficulties = []
        total_teams = len(self.team_rankings)
        
        for _, fixture in fixtures_period.iterrows():
            home_team = self.team_mapping.get(fixture['home_team'], fixture['home_team'])
            away_team = self.team_mapping.get(fixture['away_team'], fixture['away_team'])
            
            if home_team in self.team_rankings.index and away_team in self.team_rankings.index:
                # Get team stats
                home_stats = self.team_rankings.loc[home_team]
                away_stats = self.team_rankings.loc[away_team]
                
                # ATTACKING CALCULATION (same as Basic System)
                home_attack_rank = int(home_stats['attack_rank'])
                away_defense_rank = int(away_stats['defense_rank'])
                
                # Apply home advantage (reduced from 1.0 to 0.7)
                original_home_attack = home_attack_rank
                if home_advantage > 0 and home_attack_rank > 1:
                    home_attack_rank = max(1, home_attack_rank - home_advantage)
                
                # Calculate favorability with scaling (same as Basic System)
                attack_rank_difference = away_defense_rank - home_attack_rank
                attack_difficulty = attack_rank_difference / total_teams * 10
                
                # DEFENSIVE CALCULATION (same as Basic System)
                home_defense_rank = int(home_stats['defense_rank'])
                away_attack_rank = int(away_stats['attack_rank'])
                
                # Apply home advantage
                original_home_defense = home_defense_rank
                if home_advantage > 0 and home_defense_rank > 1:
                    home_defense_rank = max(1, home_defense_rank - home_advantage)
                
                # Calculate favorability with scaling
                defense_rank_difference = away_attack_rank - home_defense_rank
                defense_difficulty = defense_rank_difference / total_teams * 10
                
                difficulties.append({
                    'gameweek': fixture['gameweek'],
                    'home_team': fixture['home_team'],
                    'away_team': fixture['away_team'],
                    'mapped_home': home_team,
                    'mapped_away': away_team,
                    'attack_difficulty': attack_difficulty,
                    'defense_difficulty': defense_difficulty,
                    'overall_difficulty': (attack_difficulty + defense_difficulty) / 2,
                    # Additional data for debugging
                    'home_attack_rank': home_attack_rank,
                    'away_defense_rank': away_defense_rank,
                    'home_defense_rank': home_defense_rank,
                    'away_attack_rank': away_attack_rank,
                    'attack_rank_diff': attack_rank_difference,
                    'defense_rank_diff': defense_rank_difference
                })
        
        return pd.DataFrame(difficulties)

print("✅ Enhanced Fixture Analyzer Class Loaded!")
print("📊 Ready to analyze complete season fixture data")

✅ Enhanced Fixture Analyzer Class Loaded!
📊 Ready to analyze complete season fixture data


In [108]:
# =============================================================================
# 🔮 ENHANCED ANALYZER METHODS - ANALYSIS FUNCTIONS
# =============================================================================

# Add analysis methods to the EnhancedFixtureAnalyzer class
def analyze_gameweek(self, gw):
    """Analyze specific gameweek with detailed insights - HOME & AWAY perspective"""
    gw_fixtures = self.fixtures_df[self.fixtures_df['gameweek'] == gw].copy()
    
    print(f"⚽ GAMEWEEK {gw} ENHANCED FIXTURE ANALYSIS")
    print("=" * 70)
    print(f"📅 {len(gw_fixtures)} fixtures scheduled")
    
    if gw_fixtures.empty:
        print("❌ No fixture data available for this gameweek")
        return
    
    # Get difficulty matrices for both home and away perspectives
    home_difficulty = self.get_fixture_difficulty_matrix(gw, gw)
    
    if home_difficulty.empty:
        print("❌ No difficulty data available for analysis")
        return
    
    # Categorize difficulty (using BASIC SYSTEM categories)
    def get_difficulty_text(score):
        if score >= 4.0: return "🟢 Very Easy"
        elif score >= 2.5: return "🟡 Easy" 
        elif score >= 1.0: return "🟠 Medium-Easy"
        elif score >= -0.5: return "⚪ Medium"
        elif score >= -2.0: return "🔴 Hard"
        else: return "⚫ Very Hard"
    
    def get_recommendation(score, team_name, position):
        if team_name:  # For quick picks section
            if score >= 2.5:
                return f"🔥 Target {team_name} {position}"
            elif score >= 1.0:
                return f"⭐ Consider {team_name} {position}"
            elif score <= -2.0:
                return f"❌ Avoid {team_name} {position}"
            else:
                return f"⚪ Average {team_name} {position}"
        else:  # For detailed display section
            if score >= 2.5:
                return "Strong Pick 🔥"
            elif score >= 1.0:
                return "Good Pick ⭐"
            elif score <= -2.0:
                return "Strong Avoid ❌"
            else:
                return "Average"
    
    for i, (_, fixture) in enumerate(home_difficulty.iterrows(), 1):
        home_team = fixture['home_team']
        away_team = fixture['away_team']
        
        # HOME TEAM perspective
        home_att_diff = fixture['attack_difficulty']
        home_def_diff = fixture['defense_difficulty']
        
        # AWAY TEAM perspective (calculate reverse fixture)
        away_difficulty = self.get_fixture_difficulty_matrix(gw, gw)
        # Find the away team perspective by looking up as if away team was home
        mapped_home = fixture['mapped_home']
        mapped_away = fixture['mapped_away']
        
        if mapped_away in self.team_rankings.index and mapped_home in self.team_rankings.index:
            # Calculate away team difficulty (as if they were home, but without home advantage)
            away_stats = self.team_rankings.loc[mapped_away]
            home_stats = self.team_rankings.loc[mapped_home]
            total_teams = len(self.team_rankings)
            
            # Away attacking (vs home defense)
            away_attack_rank = int(away_stats['attack_rank'])
            home_defense_rank = int(home_stats['defense_rank'])
            away_att_rank_diff = home_defense_rank - away_attack_rank
            away_att_diff = away_att_rank_diff / total_teams * 10
            
            # Away defending (vs home attack)
            away_defense_rank = int(away_stats['defense_rank'])
            home_attack_rank = int(home_stats['attack_rank'])
            away_def_rank_diff = home_attack_rank - away_defense_rank
            away_def_diff = away_def_rank_diff / total_teams * 10
        else:
            away_att_diff = 0
            away_def_diff = 0
        
        print(f"\n{i}. 🏟️ {home_team.upper()} vs {away_team.upper()}")
        print("-" * 60)
        
        # Display both teams side by side (like Basic System)
        print(f"🏠 {home_team.upper()[:12]:12} (HOME) |  ✈️  {away_team.upper()[:12]:12} (AWAY)")
        print("-" * 28 + "|" + "-" * 28)
        
        # Get team ranks for display
        if mapped_home in self.team_rankings.index and mapped_away in self.team_rankings.index:
            home_att_rank = int(self.team_rankings.loc[mapped_home, 'attack_rank'])
            home_def_rank = int(self.team_rankings.loc[mapped_home, 'defense_rank'])
            away_att_rank = int(self.team_rankings.loc[mapped_away, 'attack_rank'])
            away_def_rank = int(self.team_rankings.loc[mapped_away, 'defense_rank'])
            
            # Show attacking analysis with ranks (like Basic System)
            home_att_text = get_difficulty_text(home_att_diff)
            away_att_text = get_difficulty_text(away_att_diff)
            home_att_analysis = f"ATT#{home_att_rank} vs DEF#{away_def_rank}"
            away_att_analysis = f"ATT#{away_att_rank} vs DEF#{home_def_rank}"
            
            print(f"⚔️  {home_att_analysis:15} {home_att_text:10} | ⚔️  {away_att_analysis:15} {away_att_text:10}")
            print(f"   {get_recommendation(home_att_diff, '', 'attackers'):15} ({home_att_diff:+4.1f}) | {get_recommendation(away_att_diff, '', 'attackers'):15} ({away_att_diff:+4.1f})")
            
            # Show defensive analysis with ranks
            home_def_text = get_difficulty_text(home_def_diff)
            away_def_text = get_difficulty_text(away_def_diff)
            home_def_analysis = f"DEF#{home_def_rank} vs ATT#{away_att_rank}"
            away_def_analysis = f"DEF#{away_def_rank} vs ATT#{home_att_rank}"
            
            print(f"🛡️  {home_def_analysis:15} {home_def_text:10} | 🛡️  {away_def_analysis:15} {away_def_text:10}")
            print(f"   {get_recommendation(home_def_diff, '', 'defenders'):15} ({home_def_diff:+4.1f}) | {get_recommendation(away_def_diff, '', 'defenders'):15} ({away_def_diff:+4.1f})")
        else:
            # Fallback if ranks not available
            home_att_text = get_difficulty_text(home_att_diff)
            away_att_text = get_difficulty_text(away_att_diff)
            print(f"⚔️  {home_att_text:15} ({home_att_diff:+4.1f}) | ⚔️  {away_att_text:15} ({away_att_diff:+4.1f})")
            
            home_def_text = get_difficulty_text(home_def_diff)
            away_def_text = get_difficulty_text(away_def_diff)
            print(f"🛡️  {home_def_text:15} ({home_def_diff:+4.1f}) | 🛡️  {away_def_text:15} ({away_def_diff:+4.1f})")
        
        # Quick recommendations
        print(f"\n💡 QUICK PICKS:")
        
        # Best attacking opportunity
        if home_att_diff > away_att_diff:
            print(f"⚔️  ATTACK: {get_recommendation(home_att_diff, home_team, 'attackers')}")
        elif away_att_diff > home_att_diff:
            print(f"⚔️  ATTACK: {get_recommendation(away_att_diff, away_team, 'attackers')}")
        else:
            print(f"⚔️  ATTACK: Both teams similar - Average picks")
        
        # Best defensive opportunity
        if home_def_diff > away_def_diff:
            print(f"🛡️  DEFENSE: {get_recommendation(home_def_diff, home_team, 'defenders/GK')}")
        elif away_def_diff > home_def_diff:
            print(f"🛡️  DEFENSE: {get_recommendation(away_def_diff, away_team, 'defenders/GK')}")
        else:
            print(f"🛡️  DEFENSE: Both teams similar - Average picks")

def get_best_fixtures(self, position_type='attack', num_gameweeks=3):
    """Get best fixtures for next few gameweeks - considers both home and away teams"""
    current_gw = self.fixtures_df['gameweek'].min()
    end_gw = min(current_gw + num_gameweeks - 1, self.fixtures_df['gameweek'].max())
    
    difficulty_matrix = self.get_fixture_difficulty_matrix(current_gw, end_gw)
    
    if difficulty_matrix.empty:
        print("❌ No fixture data available")
        return
    
    # Create a list to store both home and away opportunities
    all_opportunities = []
    
    for _, fixture in difficulty_matrix.iterrows():
        home_team = fixture['home_team']
        away_team = fixture['away_team']
        gw = fixture['gameweek']
        mapped_home = fixture['mapped_home']
        mapped_away = fixture['mapped_away']
        
        if position_type == 'attack':
            # Home team attacking opportunity
            home_score = fixture['attack_difficulty']
            all_opportunities.append({
                'team': home_team,
                'opponent': away_team,
                'gameweek': gw,
                'score': home_score,
                'venue': 'H'
            })
            
            # Away team attacking opportunity (calculate separately)
            if mapped_away in self.team_rankings.index and mapped_home in self.team_rankings.index:
                away_stats = self.team_rankings.loc[mapped_away]
                home_stats = self.team_rankings.loc[mapped_home]
                total_teams = len(self.team_rankings)
                
                # Away attacking (vs home defense)
                away_attack_rank = int(away_stats['attack_rank'])
                home_defense_rank = int(home_stats['defense_rank'])
                away_att_rank_diff = home_defense_rank - away_attack_rank
                away_score = away_att_rank_diff / total_teams * 10
                
                all_opportunities.append({
                    'team': away_team,
                    'opponent': home_team,
                    'gameweek': gw,
                    'score': away_score,
                    'venue': 'A'
                })
        else:  # defense
            # Home team defensive opportunity
            home_score = fixture['defense_difficulty']
            all_opportunities.append({
                'team': home_team,
                'opponent': away_team,
                'gameweek': gw,
                'score': home_score,
                'venue': 'H'
            })
            
            # Away team defensive opportunity (calculate separately)
            if mapped_away in self.team_rankings.index and mapped_home in self.team_rankings.index:
                away_stats = self.team_rankings.loc[mapped_away]
                home_stats = self.team_rankings.loc[mapped_home]
                total_teams = len(self.team_rankings)
                
                # Away defending (vs home attack)
                away_defense_rank = int(away_stats['defense_rank'])
                home_attack_rank = int(home_stats['attack_rank'])
                away_def_rank_diff = home_attack_rank - away_defense_rank
                away_score = away_def_rank_diff / total_teams * 10
                
                all_opportunities.append({
                    'team': away_team,
                    'opponent': home_team,
                    'gameweek': gw,
                    'score': away_score,
                    'venue': 'A'
                })
    
    # Sort by score and take top 10
    all_opportunities.sort(key=lambda x: x['score'], reverse=True)
    best_opportunities = all_opportunities[:10]
    
    if position_type == 'attack':
        print(f"🎯 BEST ATTACKING FIXTURES (GW{current_gw}-{end_gw})")
    else:
        print(f"🛡️ BEST DEFENSIVE FIXTURES (GW{current_gw}-{end_gw})")
    
    print("=" * 60)
    
    if not best_opportunities:
        print("❌ No fixture data available")
        return
        
    for i, opp in enumerate(best_opportunities, 1):
        score = opp['score']
        gw = opp['gameweek']
        team = opp['team']
        opponent = opp['opponent']
        venue = opp['venue']
        
        # Format matchup to show which team we're considering
        if venue == 'H':
            matchup = f"{team} vs {opponent}"
        else:
            matchup = f"{opponent} vs {team}"
        
        # Add recommendation level (using BASIC SYSTEM thresholds)
        if score >= 4.0:
            level = "🔥 VERY EASY"
        elif score >= 2.5:
            level = "⭐ EASY"
        elif score >= 1.0:
            level = "✅ MEDIUM-EASY"
        elif score >= -0.5:
            level = "⚪ MEDIUM"
        elif score >= -2.0:
            level = "⚠️ HARD"
        else:
            level = "❌ VERY HARD"
        
        # Show which team has the opportunity
        venue_text = "(H)" if venue == 'H' else "(A)"
        print(f"{i:2d}. GW{gw:2d}: {matchup:<30} | {level} ({score:+.1f}) - {team} {venue_text}")

def create_team_difficulty_summary(self):
    """Create summary of difficulty for each team"""
    all_difficulties = self.get_fixture_difficulty_matrix()
    
    if all_difficulties.empty:
        print("❌ No fixture difficulty data available")
        return
    
    # Aggregate by team (home fixtures only to avoid double counting)
    team_summary = []
    
    fixture_teams = set(all_difficulties['home_team'].unique())
    
    for team in fixture_teams:
        home_fixtures = all_difficulties[all_difficulties['home_team'] == team]
        
        if len(home_fixtures) == 0:
            continue
            
        avg_attack_diff = home_fixtures['attack_difficulty'].mean()
        avg_defense_diff = home_fixtures['defense_difficulty'].mean()
        num_fixtures = len(home_fixtures)
        
        team_summary.append({
            'team': team,
            'avg_attack_difficulty': avg_attack_diff,
            'avg_defense_difficulty': avg_defense_diff,
            'num_fixtures': num_fixtures,
            'overall_difficulty': (avg_attack_diff + avg_defense_diff) / 2
        })
    
    if not team_summary:
        print("❌ No team summary data available")
        return
        
    summary_df = pd.DataFrame(team_summary).sort_values('overall_difficulty', ascending=False)
    
    print("🏆 TEAM FIXTURE DIFFICULTY SUMMARY")
    print("=" * 70)
    print("(Higher scores = easier fixtures, negative = harder fixtures)")
    print()
    
    for i, (_, team_data) in enumerate(summary_df.iterrows(), 1):
        team = team_data['team']
        att_diff = team_data['avg_attack_difficulty']
        def_diff = team_data['avg_defense_difficulty']
        overall = team_data['overall_difficulty']
        fixtures = int(team_data['num_fixtures'])
        
        # Add emoji indicators
        att_emoji = "🟢" if att_diff >= 1 else "🟡" if att_diff >= -1 else "🔴"
        def_emoji = "🟢" if def_diff >= 1 else "🟡" if def_diff >= -1 else "🔴"
        
        print(f"{i:2d}. {team:<20} | {att_emoji} ATT:{att_diff:+5.1f} | {def_emoji} DEF:{def_diff:+5.1f} | Overall:{overall:+5.1f} ({fixtures} fixtures)")

def get_transfer_recommendations(self, num_gameweeks=5):
    """Advanced transfer timing recommendations"""
    current_gw = self.fixtures_df['gameweek'].min()
    end_gw = min(current_gw + num_gameweeks - 1, self.fixtures_df['gameweek'].max())
    
    difficulty_matrix = self.get_fixture_difficulty_matrix(current_gw, end_gw)
    
    if difficulty_matrix.empty:
        print("❌ No data available for transfer recommendations")
        return
    
    print(f"📈 STRATEGIC TRANSFER RECOMMENDATIONS (GW{current_gw}-{end_gw})")
    print("=" * 70)
    
    # Teams to target (good fixtures)
    good_attack_teams = difficulty_matrix[difficulty_matrix['attack_difficulty'] >= 2]['home_team'].value_counts()
    good_defense_teams = difficulty_matrix[difficulty_matrix['defense_difficulty'] >= 2]['home_team'].value_counts()
    
    # Teams to avoid (bad fixtures)  
    bad_attack_teams = difficulty_matrix[difficulty_matrix['attack_difficulty'] <= -2]['home_team'].value_counts()
    bad_defense_teams = difficulty_matrix[difficulty_matrix['defense_difficulty'] <= -2]['home_team'].value_counts()
    
    print(f"\n🎯 TARGET TEAMS FOR TRANSFERS:")
    print("🔥 Attacking Assets:")
    if len(good_attack_teams) > 0:
        for team, count in good_attack_teams.head(5).items():
            print(f"   • {team} ({count} good fixtures)")
    else:
        print("   • No standout attacking opportunities")
        
    print("🛡️ Defensive Assets:")
    if len(good_defense_teams) > 0:
        for team, count in good_defense_teams.head(5).items():
            print(f"   • {team} ({count} good fixtures)")
    else:
        print("   • No standout defensive opportunities")
    
    print(f"\n❌ AVOID THESE TEAMS:")
    print("⚔️ Poor Attacking Fixtures:")
    if len(bad_attack_teams) > 0:
        for team, count in bad_attack_teams.head(5).items():
            print(f"   • {team} ({count} tough fixtures)")
    else:
        print("   • No teams to specifically avoid for attacking")
        
    print("🏰 Poor Defensive Fixtures:")
    if len(bad_defense_teams) > 0:
        for team, count in bad_defense_teams.head(5).items():
            print(f"   • {team} ({count} tough fixtures)")
    else:
        print("   • No teams to specifically avoid for defending")

# Attach methods to the class
EnhancedFixtureAnalyzer.analyze_gameweek = analyze_gameweek
EnhancedFixtureAnalyzer.get_best_fixtures = get_best_fixtures
EnhancedFixtureAnalyzer.create_team_difficulty_summary = create_team_difficulty_summary
EnhancedFixtureAnalyzer.get_transfer_recommendations = get_transfer_recommendations

print("✅ Enhanced Analysis Methods Added!")
print("🔧 Methods: analyze_gameweek, get_best_fixtures, create_team_difficulty_summary, get_transfer_recommendations")

✅ Enhanced Analysis Methods Added!
🔧 Methods: analyze_gameweek, get_best_fixtures, create_team_difficulty_summary, get_transfer_recommendations


In [4]:
# =============================================================================
# 🚀 INITIALIZE ENHANCED FIXTURE ANALYZER
# =============================================================================

print("🔮 INITIALIZING ENHANCED FIXTURE ANALYZER...")
print("=" * 60)

try:
    # Initialize analyzer with your existing data (using SAME team_rankings as Basic System)
    analyzer = EnhancedFixtureAnalyzer(season_stats, team_rankings, 'fixture_template.csv')
    
    print("✅ Analyzer initialized successfully!")
    print(f"📊 Fixture data loaded: {len(analyzer.fixtures_df)} fixtures")
    print(f"📅 Gameweeks available: {analyzer.fixtures_df['gameweek'].min()} to {analyzer.fixtures_df['gameweek'].max()}")
    print(f"🏟️ Teams mapped: {len(analyzer.team_mapping)} teams")
    
    # Check for any mapping issues
    missing_mappings = [team for team, mapped in analyzer.team_mapping.items() 
                       if mapped not in analyzer.team_rankings.index and mapped == team]
    
    if missing_mappings:
        print(f"⚠️ Teams without ranking data: {', '.join(missing_mappings[:5])}")
        print("   (These teams will be skipped in analysis)")
    else:
        print("✅ All teams successfully mapped to ranking data")
    
    print("\n🎯 ENHANCED FIXTURE ANALYZER READY!")
    print("Available methods:")
    print("• analyzer.analyze_gameweek(gw) - Deep dive into specific gameweek")
    print("• analyzer.get_best_fixtures('attack'/'defense', num_gw) - Find best opportunities")
    print("• analyzer.create_team_difficulty_summary() - Complete team overview")
    print("• analyzer.get_transfer_recommendations(num_gw) - Strategic transfer advice")
    
except Exception as e:
    print(f"❌ Error initializing analyzer: {e}")
    print("Please check that 'fixture_template.csv' exists and has the correct format")
    import traceback
    traceback.print_exc()

🔮 INITIALIZING ENHANCED FIXTURE ANALYZER...
❌ Error initializing analyzer: name 'season_stats' is not defined
Please check that 'fixture_template.csv' exists and has the correct format


Traceback (most recent call last):
  File "C:\Users\ITKOKM\AppData\Local\Temp\ipykernel_25440\3978810927.py", line 10, in <module>
    analyzer = EnhancedFixtureAnalyzer(season_stats, team_rankings, 'fixture_template.csv')
                                       ^^^^^^^^^^^^
NameError: name 'season_stats' is not defined


In [110]:
# =============================================================================
# 🎯 GAMEWEEK ANALYSIS DEMO
# =============================================================================

print("🎯 ENHANCED FIXTURE ANALYSIS - GAMEWEEK DEMO")
print("=" * 60)

# Analyze the first available gameweek
if 'analyzer' in locals():
    first_gw = analyzer.fixtures_df['gameweek'].min()
    print(f"📊 Analyzing Gameweek {first_gw} as demonstration...")
    print()
    
    analyzer.analyze_gameweek(first_gw)
    
    print(f"\n" + "="*60)
    print("💡 How to interpret the results:")
    print("🟢 Very Easy (3+) = Excellent opportunity, definitely target")
    print("🟡 Easy (1-3) = Good opportunity, consider targeting")  
    print("🟠 Medium (-1 to 1) = Average fixture, neutral")
    print("🔴 Hard (-3 to -1) = Difficult fixture, consider avoiding")
    print("⚫ Very Hard (-3+) = Very difficult, definitely avoid")
    
else:
    print("❌ Analyzer not initialized. Please run the initialization cell first.")

🎯 ENHANCED FIXTURE ANALYSIS - GAMEWEEK DEMO
📊 Analyzing Gameweek 7 as demonstration...

⚽ GAMEWEEK 7 ENHANCED FIXTURE ANALYSIS
📅 10 fixtures scheduled

1. 🏟️ BOURNEMOUTH vs FULHAM
------------------------------------------------------------
🏠 BOURNEMOUTH  (HOME) |  ✈️  FULHAM       (AWAY)
----------------------------|----------------------------
⚔️  ATT#6 vs DEF#13 🟡 Easy     | ⚔️  ATT#13 vs DEF#6 ⚫ Very Hard
   Strong Pick 🔥   (+3.9) | Strong Avoid ❌  (-3.5)
🛡️  DEF#6 vs ATT#13 🟡 Easy     | 🛡️  DEF#13 vs ATT#6 ⚫ Very Hard
   Strong Pick 🔥   (+3.9) | Strong Avoid ❌  (-3.5)

💡 QUICK PICKS:
⚔️  ATTACK: 🔥 Target Bournemouth attackers
🛡️  DEFENSE: 🔥 Target Bournemouth defenders/GK

2. 🏟️ LEEDS vs SPURS
------------------------------------------------------------
🏠 LEEDS        (HOME) |  ✈️  SPURS        (AWAY)
----------------------------|----------------------------
⚔️  ATT#14 vs DEF#4 ⚫ Very Hard | ⚔️  ATT#10 vs DEF#10 ⚪ Medium  
   Strong Avoid ❌  (-4.7) | Average         (+0.0)
🛡️  DEF

In [3]:
# =============================================================================
# 🔥 BEST FIXTURE OPPORTUNITIES
# =============================================================================

if 'analyzer' in locals():
    print("🔥 FINDING BEST FIXTURE OPPORTUNITIES")
    print("=" * 60)
    
    # Get best attacking fixtures for next 3 gameweeks
    print("⚔️ ATTACKING OPPORTUNITIES:")
    analyzer.get_best_fixtures('attack', 3)
    
    print(f"\n" + "="*60)
    
    # Get best defensive fixtures for next 3 gameweeks  
    print("🛡️ DEFENSIVE OPPORTUNITIES:")
    analyzer.get_best_fixtures('defense', 3)
    
    print(f"\n" + "="*60)
    print("💡 Strategy Tips:")
    print("• 🔥 EXCELLENT fixtures (3+): Strongly consider transfers to these teams")
    print("• ✅ GOOD fixtures (1-3): Solid options, good for captaincy")
    print("• ❌ AVOID fixtures (-1 or worse): Consider transferring out or benching")
    print("• Look for teams with multiple good fixtures for better value")
    
else:
    print("❌ Analyzer not initialized. Please run the initialization cell first.")

❌ Analyzer not initialized. Please run the initialization cell first.


In [112]:
# =============================================================================
# 📊 TEAM DIFFICULTY OVERVIEW & TRANSFER STRATEGY
# =============================================================================

if 'analyzer' in locals():
    print("📊 COMPLETE TEAM FIXTURE DIFFICULTY OVERVIEW")
    print("=" * 70)
    
    # Show team difficulty summary
    analyzer.create_team_difficulty_summary()
    
    print(f"\n" + "="*70)
    print("📈 STRATEGIC TRANSFER RECOMMENDATIONS")
    print("=" * 70)
    
    # Get transfer recommendations for next 5 gameweeks
    analyzer.get_transfer_recommendations(5)
    
    print(f"\n" + "="*70)
    print("🎯 HOW TO USE THIS ENHANCED ANALYSIS:")
    print("=" * 70)
    print("1. 📊 Use Team Overview to identify teams with consistently good/bad fixtures")
    print("2. 🎯 Use Transfer Recommendations to time your moves strategically")
    print("3. ⚔️ Target teams with multiple good attacking fixtures for forwards/mids")
    print("4. 🛡️ Target teams with multiple good defensive fixtures for defenders/GKs")
    print("5. 📅 Plan 3-5 gameweeks ahead instead of just the next gameweek")
    print("6. 🔄 Update fixture data regularly as new gameweeks become available")
    
    print(f"\n🎉 ENHANCED FIXTURE ANALYSIS COMPLETE!")
    print("This system provides strategic insights beyond basic fixture difficulty!")
    
else:
    print("❌ Analyzer not initialized. Please run the initialization cell first.")

📊 COMPLETE TEAM FIXTURE DIFFICULTY OVERVIEW
🏆 TEAM FIXTURE DIFFICULTY SUMMARY
(Higher scores = easier fixtures, negative = harder fixtures)

 1. Arsenal              | 🟢 ATT: +3.0 | 🟢 DEF: +5.4 | Overall: +4.2 (4 fixtures)
 2. Liverpool            | 🟢 ATT: +6.2 | 🟢 DEF: +1.8 | Overall: +4.0 (4 fixtures)
 3. Crystal Palace       | 🟢 ATT: +3.8 | 🟢 DEF: +4.0 | Overall: +3.9 (4 fixtures)
 4. Bournemouth          | 🟢 ATT: +4.2 | 🟢 DEF: +2.0 | Overall: +3.1 (5 fixtures)
 5. Spurs                | 🟢 ATT: +2.2 | 🟢 DEF: +3.9 | Overall: +3.0 (5 fixtures)
 6. Man City             | 🟢 ATT: +2.6 | 🟢 DEF: +1.4 | Overall: +2.0 (5 fixtures)
 7. Newcastle            | 🟡 ATT: -0.9 | 🟢 DEF: +4.3 | Overall: +1.7 (5 fixtures)
 8. Man Utd              | 🟢 ATT: +5.2 | 🔴 DEF: -2.8 | Overall: +1.2 (4 fixtures)
 9. Everton              | 🟡 ATT: +0.4 | 🟢 DEF: +1.2 | Overall: +0.8 (5 fixtures)
10. Chelsea              | 🟢 ATT: +1.4 | 🟡 DEF: -0.2 | Overall: +0.6 (4 fixtures)
11. Brighton             | 🟡 ATT: +0.4 

End == Checkpoint. Features Working


# 🌐 Frontend Integration - Data Export

## Export Data for Web Dashboard
This section exports all your analysis data to JSON files that the web frontend can consume via API.

In [113]:
# Import the data processor
import sys
import os
sys.path.append(os.path.join(os.getcwd(), 'backend'))

from data_processor import FPLDataProcessor

# Initialize the data processor
processor = FPLDataProcessor(data_dir="data")

# Export all data for the frontend
print("🚀 Exporting FPL data for web dashboard...")
print("=" * 50)

processor.process_and_export_all(
    df=df,                          # Original dataframe
    season_stats=season_stats,      # Season statistics  
    team_rankings=team_rankings,    # Team rankings
    analyzer=analyzer               # Enhanced analyzer
)

print("\n✅ Data export complete!")
print("🌐 Your web frontend can now access this data via the API")
print("📁 JSON files saved in: ./data/")

print("\n📊 ENHANCED DATA FEATURES:")
print("✅ Ownership % - from 'selected_by_percent' column")
print("✅ Form Score - calculated from last 5 gameweeks performance")
print("✅ All original analysis data included")

print("\n🔗 API endpoints will be available at:")
print("  - http://localhost:5000/api/players")
print("  - http://localhost:5000/api/teams") 
print("  - http://localhost:5000/api/fixtures")
print("  - http://localhost:5000/api/top-performers")
print("  - http://localhost:5000/api/hidden-gems")
print("  - http://localhost:5000/api/transfers")

🚀 Exporting FPL data for web dashboard...
🔄 Processing FPL data for export...
📊 Exported 758 players
🏆 Exported 20 teams
🔥 Exported top performers data
💎 Exported hidden gems and differentials
⚽ Exported 90 fixtures
📈 Exported transfer recommendations
✅ All data exported successfully!
📁 Files saved to: c:\Users\ITKOKM\Downloads\fpl\data

✅ Data export complete!
🌐 Your web frontend can now access this data via the API
📁 JSON files saved in: ./data/

📊 ENHANCED DATA FEATURES:
✅ Ownership % - from 'selected_by_percent' column
✅ Form Score - calculated from last 5 gameweeks performance
✅ All original analysis data included

🔗 API endpoints will be available at:
  - http://localhost:5000/api/players
  - http://localhost:5000/api/teams
  - http://localhost:5000/api/fixtures
  - http://localhost:5000/api/top-performers
  - http://localhost:5000/api/hidden-gems
  - http://localhost:5000/api/transfers
📊 Exported 758 players
🏆 Exported 20 teams
🔥 Exported top performers data
💎 Exported hidden ge