In [None]:
import requests
import pandas as pd
import numpy as np # Import numpy for sqrt calculation if needed

In [None]:
def get_fpl_data_enhanced():
    """
    Fetches player data from the Fantasy Premier League API
    and returns it as a pandas DataFrame including form, status, and calculated value_points.

    Returns:
        pandas.DataFrame: DataFrame containing player data including
                          id, name, team, points, cost, minutes, position, form, status,
                          and form_points.
                          Returns None if the API request fails or processing error occurs.
    """
    # The main FPL API endpoint for bootstrap static data
    url = "https://fantasy.premierleague.com/api/bootstrap-static/"
    print(f"Attempting to fetch data from: {url}")
    current_time_str = pd.Timestamp.now(tz='Africa/Nairobi').strftime('%Y-%m-%d %H:%M:%S %Z')
    print(f"Current time: {current_time_str}")


    try:
        response = requests.get(url, timeout=15) # Add a timeout
        response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
        print("Successfully connected to FPL API.")

    except requests.exceptions.Timeout:
        print(f"Error fetching data from FPL API: The request timed out.")
        return None
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from FPL API: {e}")
        return None

    try:
        # Parse the JSON response
        data = response.json()
        print("Successfully parsed JSON response.")

        # Extract player data ('elements'), team data ('teams'), and position data ('element_types')
        players = data.get('elements')
        teams_data = data.get('teams')
        positions_data = data.get('element_types')

        if players is None or teams_data is None or positions_data is None:
            print("Error: Could not find 'elements', 'teams', or 'element_types' key in API response.")
            return None

        # Create mapping dictionaries
        team_map = {team['id']: team['name'] for team in teams_data}
        position_map = {pos['id']: pos['singular_name_short'] for pos in positions_data}
        print("Created team and position mappings.")

        # Process player data, now including 'form' and 'status'
        player_list = []
        for player in players:
            player_list.append({
                'id': player.get('id'),
                'name': player.get('web_name'),
                'team': team_map.get(player.get('team'), 'Unknown'),
                'points': player.get('total_points'),
                'cost': player.get('now_cost', 0) / 10.0, # Default to 0 if missing before dividing
                'minutes': player.get('minutes'),
                'position': position_map.get(player.get('element_type'), 'Unknown'),
                'form': player.get('form', '0'), # Default to '0' string if missing
                'status': player.get('status', 'u') # *** ADD STATUS HERE (default to unavailable) ***
            })

        # Create pandas DataFrame
        fpl_df = pd.DataFrame(player_list)
        print(f"Successfully processed basic data for {len(fpl_df)} players.")

        # --- Data Type Conversion and Calculation ---

        # Convert 'form' to numeric, coercing errors to NaN
        fpl_df['form'] = pd.to_numeric(fpl_df['form'], errors='coerce')

        # Ensure 'points' and 'minutes' are numeric
        fpl_df['points'] = pd.to_numeric(fpl_df['points'], errors='coerce')
        fpl_df['minutes'] = pd.to_numeric(fpl_df['minutes'], errors='coerce')

        # Fill potential NaNs from coercion with 0 before calculations
        fpl_df['form'] = fpl_df['form'].fillna(0)
        fpl_df['points'] = fpl_df['points'].fillna(0)
        fpl_df['minutes'] = fpl_df['minutes'].fillna(0)
        fpl_df['cost'] = pd.to_numeric(fpl_df['cost'], errors='coerce').fillna(0)


        # Calculate 'form_points' (Geometric Mean of form * points)
        fpl_df['form_points'] = (fpl_df['form'] * fpl_df['points']).pow(0.5).fillna(0)
        print("Calculated 'form_points' column.")

        # Reorder columns (optional) - include 'status'
        cols_order = ['id', 'name', 'position', 'team', 'cost', 'form', 'points', 'form_points', 'minutes', 'status']
        cols_order = [col for col in cols_order if col in fpl_df.columns]
        fpl_df = fpl_df[cols_order]


        return fpl_df

    except KeyError as e:
        print(f"Error processing FPL data: Missing key {e}. Check API structure.")
        return None
    except ValueError as e:
        print(f"Error during data conversion: {e}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred during data processing: {e}")
        return None

In [None]:
# --- Main Execution ---
if __name__ == "__main__":
    # Get the enhanced FPL player data
    # This function is assumed to exist and return a pandas DataFrame.
    # For example:
    # import pandas as pd
    # def get_fpl_data_enhanced():
    #     # In a real scenario, this would fetch and process data from an API
    #     data = {'web_name': ['PlayerA', 'PlayerB', 'PlayerC', 'PlayerD', 'PlayerE', 'PlayerF', 'PlayerG'],
    #             'position': ['GKP', 'DEF', 'MID', 'FWD', 'GKP', 'DEF', 'MID'],
    #             'minutes': [90, 180, 270, 110, 360, 450, 121],
    #             'form_points': [2.5, 5.0, 8.1, 1.2, 6.5, 4.3, 7.8]}
    #     return pd.DataFrame(data)

    fpl_player_df = get_fpl_data_enhanced()

    minutes = 480

    if fpl_player_df is not None:
        print("\n--- Full Enhanced FPL Player Data (Top 5 by form_points, >120 mins) ---")
        # Filter the main df for players with >120 minutes and then sort for viewing
        fpl_player_df_view = fpl_player_df[fpl_player_df['minutes'] > minutes]
        print(fpl_player_df_view.sort_values(by='form_points', ascending=False).head())

        print("\n--- Original DataFrame Info ---")
        fpl_player_df.info()

        # --- Create Position-Specific DataFrames (Filtered and Sorted) ---
        print("\n--- Creating, Filtering (>120 mins), and Sorting Position DataFrames by 'form_points' (Descending) ---")

        # Goalkeepers (GKP)
        # Apply both the position and minutes filter
        df_gkp = fpl_player_df[(fpl_player_df['position'] == 'GKP') & (fpl_player_df['minutes'] > minutes)].copy()
        df_gkp = df_gkp.sort_values(by='form_points', ascending=False)
        print(f"\n--- Goalkeepers DataFrame ({len(df_gkp)} players) ---")
        print(df_gkp.head())

        # Defenders (DEF)
        # Apply both the position and minutes filter
        df_def = fpl_player_df[(fpl_player_df['position'] == 'DEF') & (fpl_player_df['minutes'] > minutes)].copy()
        df_def = df_def.sort_values(by='form_points', ascending=False)
        print(f"\n--- Defenders DataFrame ({len(df_def)} players) ---")
        print(df_def.head())

        # Midfielders (MID)
        # Apply both the position and minutes filter
        df_mid = fpl_player_df[(fpl_player_df['position'] == 'MID') & (fpl_player_df['minutes'] > minutes)].copy()
        df_mid = df_mid.sort_values(by='form_points', ascending=False)
        print(f"\n--- Midfielders DataFrame ({len(df_mid)} players) ---")
        print(df_mid.head())

        # Forwards (FWD)
        # Apply both the position and minutes filter
        df_fwd = fpl_player_df[(fpl_player_df['position'] == 'FWD') & (fpl_player_df['minutes'] > minutes)].copy()
        df_fwd = df_fwd.sort_values(by='form_points', ascending=False)
        print(f"\n--- Forwards DataFrame ({len(df_fwd)} players) ---")
        print(df_fwd.head())

        # Now df_gkp, df_def, df_mid, df_fwd contain the players for each position
        # with more than 120 minutes played, sorted by 'form_points' in descending order.

    else:
        print("\nFailed to retrieve or process FPL data.")

Attempting to fetch data from: https://fantasy.premierleague.com/api/bootstrap-static/
Current time: 2025-10-24 17:24:47 EAT
Successfully connected to FPL API.
Successfully parsed JSON response.
Created team and position mappings.
Successfully processed basic data for 745 players.
Calculated 'form_points' column.

--- Full Enhanced FPL Player Data (Top 5 by form_points, >120 mins) ---
      id      name position         team  cost  form  points  form_points  \
474  430   Haaland      FWD     Man City  14.7  12.3      83    31.951526   
134   82   Semenyo      MID  Bournemouth   8.1   9.7      70    26.057628   
4      5   Gabriel      DEF      Arsenal   6.4  11.3      59    25.820534   
632  694   Mukiele      DEF   Sunderland   4.1   8.7      44    19.565275   
631  683  Alderete      DEF   Sunderland   4.1   7.7      45    18.614510   

     minutes status  
474      683      a  
134      720      a  
4        720      d  
632      540      a  
631      667      i  

--- Original Dat

In [None]:
df_gkp.head(10)

Unnamed: 0,id,name,position,team,cost,form,points,form_points,minutes,status
629,670,Roefs,GKP,Sunderland,4.6,6.3,48,17.389652,720,a
0,1,Raya,GKP,Arsenal,5.7,4.7,40,13.711309,720,a
520,469,Pope,GKP,Newcastle,5.2,3.7,43,12.613485,720,a
160,101,Kelleher,GKP,Brentford,4.5,5.7,27,12.405644,720,a
114,470,Dúbravka,GKP,Burnley,4.0,4.0,29,10.77033,720,a
119,67,Petrović,GKP,Bournemouth,4.5,2.7,29,8.848729,720,a
312,287,Pickford,GKP,Everton,5.5,2.3,32,8.579044,720,a
636,565,Vicario,GKP,Spurs,5.1,2.0,36,8.485281,720,a
239,220,Sánchez,GKP,Chelsea,4.9,2.7,24,8.049845,634,a
274,253,Henderson,GKP,Crystal Palace,5.0,2.0,28,7.483315,720,a


In [None]:
df_def.head(15)

Unnamed: 0,id,name,position,team,cost,form,points,form_points,minutes,status
4,5,Gabriel,DEF,Arsenal,6.4,11.3,59,25.820534,720,d
632,694,Mukiele,DEF,Sunderland,4.1,8.7,44,19.565275,540,a
631,683,Alderete,DEF,Sunderland,4.1,7.7,45,18.61451,667,i
7,8,J.Timber,DEF,Arsenal,6.0,6.3,54,18.444511,611,a
277,256,Muñoz,DEF,Crystal Palace,5.6,6.7,46,17.555626,720,a
244,225,James,DEF,Chelsea,5.5,8.3,37,17.524269,513,a
281,260,Guéhi,DEF,Crystal Palace,4.9,4.7,50,15.32971,720,a
595,532,Hume,DEF,Sunderland,4.5,6.0,37,14.899664,708,a
6,7,Calafiori,DEF,Arsenal,5.8,4.3,48,14.366628,614,a
526,476,Burn,DEF,Newcastle,5.1,4.3,45,13.910428,720,a


In [None]:
df_mid.head(10)

Unnamed: 0,id,name,position,team,cost,form,points,form_points,minutes,status
134,82,Semenyo,MID,Bournemouth,8.1,9.7,70,26.057628,720,a
19,21,Rice,MID,Arsenal,6.6,7.7,43,18.196153,632,a
287,267,Sarr,MID,Crystal Palace,6.5,6.7,38,15.95619,522,a
671,673,J.Palhinha,MID,Spurs,5.5,6.3,39,15.674821,603,a
653,582,Kudus,MID,Spurs,6.8,5.3,41,14.741099,708,a
628,668,Xhaka,MID,Sunderland,5.1,5.7,37,14.522396,720,a
479,119,Mbeumo,MID,Man Utd,8.0,5.7,36,14.324804,685,a
427,384,Gakpo,MID,Liverpool,7.5,5.0,40,14.142136,645,a
260,241,Caicedo,MID,Chelsea,5.9,4.3,46,14.064139,675,a
538,488,Bruno G.,MID,Newcastle,6.5,6.0,32,13.856406,625,a


In [None]:
df_fwd.head(10)

Unnamed: 0,id,name,position,team,cost,form,points,form_points,minutes,status
474,430,Haaland,FWD,Man City,14.7,12.3,83,31.951526,683,a
193,136,Thiago,FWD,Brentford,6.1,7.7,40,17.549929,651,a
303,283,Mateta,FWD,Crystal Palace,7.6,7.0,37,16.093477,690,a
700,624,Bowen,FWD,West Ham,7.8,4.3,37,12.613485,720,a
267,249,João Pedro,FWD,Chelsea,7.5,2.0,41,9.055385,673,a
66,64,Watkins,FWD,Aston Villa,8.6,3.7,21,8.81476,627,a
31,666,Gyökeres,FWD,Arsenal,9.0,2.0,28,7.483315,666,a
668,597,Richarlison,FWD,Spurs,6.6,1.3,33,6.549809,483,a
579,525,Wood,FWD,Nott'm Forest,7.2,1.7,25,6.519202,538,d
336,311,Beto,FWD,Everton,5.3,1.7,17,5.375872,497,a


In [None]:
# Make sure PuLP is installed
!pip install PuLP
import pulp
from pulp import LpProblem, LpMaximize, LpVariable, lpSum, LpBinary, PULP_CBC_CMD, LpStatus
import pandas as pd # Ensure pandas is imported if not already
import time # For potential delays if needed later
from collections import defaultdict # For potential use later

# --- Function 1: Select Optimal 15-Man Squad (Free Hit) ---
# This function remains largely the same, but it will use the globally modified 'df' later
def select_optimal_team(player_data):
    """
    Selects the mathematically optimal 15-man squad using PuLP
    to maximize 'form_points' subject to FPL constraints (£100m budget).
    Assumes player_data DataFrame includes injury-adjusted form_points.
    """
    print("\n--- Starting Optimal 15-Man Squad Selection (using PuLP) ---")

    try:
        # Prepare data for PuLP
        player_data = player_data.dropna(subset=['cost', 'team', 'position']) # Clean data just in case
        players = player_data.to_dict('index')
        player_indices = list(players.keys())
        teams = player_data['team'].unique() # Get unique team names from the data

        budget = 100.0
        team_max = 3
        position_max = {'GKP': 2, 'DEF': 5, 'MID': 5, 'FWD': 3}

        prob = LpProblem("FPL_Optimal_Squad", LpMaximize)
        player_vars = LpVariable.dicts("Player", player_indices, 0, 1, LpBinary)

        # Objective: Maximize 'form_points' (now injury-adjusted)
        prob += lpSum([players[i]['form_points'] * player_vars[i] for i in player_indices]), "Total_Form_Points"

        # --- Constraints ---
        prob += lpSum([players[i]['cost'] * player_vars[i] for i in player_indices]) <= budget, "Total_Cost"
        prob += lpSum([player_vars[i] for i in player_indices]) == 15, "Total_Players"
        for pos, max_val in position_max.items():
            prob += lpSum([player_vars[i] for i in player_indices if players[i]['position'] == pos]) == max_val, f"Total_{pos}"
        for team in teams:
             # Ensure team name is valid for constraint naming
             safe_team_name = str(team).replace(' ', '_').replace('&','').replace('-','_') # Basic cleaning
             prob += lpSum([player_vars[i] for i in player_indices if players[i]['team'] == team]) <= team_max, f"Total_in_{safe_team_name}"


        # --- Solve ---
        print("Solving 15-man squad problem...")
        prob.solve(PULP_CBC_CMD(msg=0))
        print(f"Status: {LpStatus[prob.status]}")

    except Exception as e:
        print(f"An error occurred during 15-man optimization: {e}")
        return None

    if LpStatus[prob.status] != 'Optimal':
        print(f"Warning: Optimal 15-man solution not found. Status: {LpStatus[prob.status]}")
        return None

    squad_indices = [i for i in player_indices if player_vars[i].varValue > 0.9]
    optimal_squad_df = player_data.loc[squad_indices]

    print(f"\n*** Optimal 15-Man Squad Found! ***")
    print(f"Total Cost: £{optimal_squad_df['cost'].sum():.1f}m")
    print(f"Total Form Points (Maximized): {optimal_squad_df['form_points'].sum():.2f}")

    return optimal_squad_df # Return without sorting here, sort later if needed


# --- Function 2: Select Optimal Starting 11 ---
# This function also remains largely the same
def select_starting_11(squad_df):
    """
    Selects the optimal STARTING 11 from a 15-man squad, maximizing form_points.
    Assumes squad_df includes injury-adjusted form_points.
    """
    if not isinstance(squad_df, pd.DataFrame) or len(squad_df) != 15:
        print(f"Error: Input must be a 15-player DataFrame. Received: {len(squad_df)} players.")
        return None, None

    print("\n--- Starting Optimal 11 Selection (using PuLP) ---")

    try:
        players = squad_df.to_dict('index')
        player_indices = list(players.keys())

        prob = LpProblem("FPL_Starting_11", LpMaximize)
        player_vars = LpVariable.dicts("Player_Starts", player_indices, 0, 1, LpBinary)

        # Objective: Maximize 'form_points' (injury-adjusted)
        prob += lpSum([players[i]['form_points'] * player_vars[i] for i in player_indices]), "Total_Starting_Points"

        # Constraints
        prob += lpSum([player_vars[i] for i in player_indices]) == 11, "Total_Starters"
        prob += lpSum([player_vars[i] for i in player_indices if players[i]['position'] == 'GKP']) == 1, "GKP_Count"
        # Corrected DEF constraint for standard formations (allow 3, 4, or 5)
        prob += lpSum([player_vars[i] for i in player_indices if players[i]['position'] == 'DEF']) >= 3, "Min_DEF"
        prob += lpSum([player_vars[i] for i in player_indices if players[i]['position'] == 'DEF']) <= 5, "Max_DEF"
        # Corrected MID constraint
        prob += lpSum([player_vars[i] for i in player_indices if players[i]['position'] == 'MID']) >= 2, "Min_MID"
        prob += lpSum([player_vars[i] for i in player_indices if players[i]['position'] == 'MID']) <= 5, "Max_MID"
        # Corrected FWD constraint
        prob += lpSum([player_vars[i] for i in player_indices if players[i]['position'] == 'FWD']) >= 1, "Min_FWD"
        prob += lpSum([player_vars[i] for i in player_indices if players[i]['position'] == 'FWD']) <= 3, "Max_FWD"


        print("Solving for best 11...")
        prob.solve(PULP_CBC_CMD(msg=0))

        if LpStatus[prob.status] != 'Optimal':
            print("Warning: Optimal starting 11 not found.")
            return None, None

        starting_indices = []
        bench_indices = []
        for i in player_indices:
            if player_vars[i].varValue > 0.9: # 1 = Starts
                starting_indices.append(i)
            else: # 0 = Benched
                bench_indices.append(i)

        starting_11_df = squad_df.loc[starting_indices]
        bench_df = squad_df.loc[bench_indices]
        starting_total_form = starting_11_df['form_points'].sum() # Calculate here

        print(f"*** Optimal Starting 11 Found ***")
        # Use the calculated variable for printing
        print(f"Total Starting Form Points (Maximized): {starting_total_form}")

        # Sort starters by form_points, descending
        starting_11_df = starting_11_df.sort_values(by='form_points', ascending=False)
        # Sort bench by position (GKP first), then points
        bench_df = bench_df.sort_values(by=['position', 'form_points'], ascending=[True, False])

        return starting_11_df, bench_df

    except Exception as e:
        print(f"An unexpected error occurred during 11-player selection: {e}")
        return None, None


# --- Function 3: Find Best Transfer Squad (Adapted for this notebook) ---
# This is the function we defined previously for transfer simulation
def find_best_transfer_squad(current_squad_indices, num_transfers, budget, player_data):
    """
    Finds the optimal 15-player squad given budget and transfers, maximizing form_points.
    Uses the DataFrame index as player identifiers.
    Assumes player_data includes injury-adjusted form_points and necessary dummy columns.
    """
    all_player_indices = player_data.index.tolist()
    prob = LpProblem(f"Squad_Transfer_Opt_{num_transfers}", LpMaximize)

    player_vars = LpVariable.dicts("player", all_player_indices, cat='Binary')

    # Objective: Maximize 'form_points'
    prob += lpSum([player_data.loc[i]['form_points'] * player_vars[i] for i in all_player_indices]), "Total_Squad_FormPoints"

    # --- Constraints ---
    prob += lpSum([player_data.loc[i]['cost'] * player_vars[i] for i in all_player_indices]) <= budget, "Total_Cost"
    prob += lpSum([player_vars[i] for i in all_player_indices]) == 15, "Total_Players"

    # Position Counts (Using dummy columns assumed to be in player_data)
    prob += lpSum([player_data.loc[i]['GKP'] * player_vars[i] for i in all_player_indices]) == 2, "Num_GKP"
    prob += lpSum([player_data.loc[i]['DEF'] * player_vars[i] for i in all_player_indices]) == 5, "Num_DEF"
    prob += lpSum([player_data.loc[i]['MID'] * player_vars[i] for i in all_player_indices]) == 5, "Num_MID"
    prob += lpSum([player_data.loc[i]['FWD'] * player_vars[i] for i in all_player_indices]) == 3, "Num_FWD"

    # Max 3 players per team (Using team dummy columns and team_id_set assumed to be global/passed)
    global team_id_set, team_id_map # Make sure these are accessible
    for team_name in team_id_set: # Iterate through unique team names
         # Ensure team name is valid for constraint naming and exists as a column
         safe_team_name = str(team_name).replace(' ', '_').replace('&','').replace('-','_')
         if team_name in player_data.columns: # Check if the dummy column exists
             prob += lpSum([player_vars[i] for i in all_player_indices if player_data.loc[i, team_name] == 1]) <= 3, f"Max_Per_{safe_team_name}"
         # else: # Optional: print warning if team dummy column missing
             # print(f"Warning: Team dummy column '{team_name}' not found for constraint.")


    # Transfer Constraint
    players_to_keep = 15 - num_transfers
    prob += lpSum([player_vars[i] for i in current_squad_indices if i in player_vars]) == players_to_keep, "Num_Transfers" # Check if i exists

    # --- Solve ---
    prob.solve(PULP_CBC_CMD(msg=0))

    if LpStatus[prob.status] != 'Optimal':
        print(f"Solver failed for {num_transfers} transfers. Status: {LpStatus[prob.status]}")
        return None, 0, [], []

    new_squad_indices = [i for i in all_player_indices if player_vars[i].varValue == 1]
    total_squad_form = prob.objective.value()

    transfers_out_indices = [idx for idx in current_squad_indices if idx not in new_squad_indices]
    transfers_in_indices = [idx for idx in new_squad_indices if idx not in current_squad_indices]

    return new_squad_indices, total_squad_form, transfers_in_indices, transfers_out_indices

print("\nAll necessary PuLP functions defined/updated.")


All necessary PuLP functions defined/updated.


In [None]:
# --- 1. ENTER YOUR 15 PLAYER NAMES HERE ---
# Must match the 'name' column from your get_fpl_data_enhanced() function
MY_SQUAD_NAMES = [
    # --- GK ---
    "Roefs", "Vicario",
    # --- DEF ---
    "Calafiori", "Senesi", "Guéhi", "Gudmundsson", "Chalobah",
    # --- MID ---
    "Saka", "Kudus", "Semenyo", "Gravenberch", "Cullen",
    # --- FWD ---
    "Haaland", "Gyökeres", "João Pedro"
]

# --- 2. ENTER YOUR TOTAL BUDGET HERE ---
# (Team Value + Bank)
MY_TOTAL_BUDGET = 99.6

print(f"My Squad: {MY_SQUAD_NAMES}")
print(f"My Budget: £{MY_TOTAL_BUDGET}m")

My Squad: ['Roefs', 'Vicario', 'Calafiori', 'Senesi', 'Guéhi', 'Gudmundsson', 'Chalobah', 'Saka', 'Kudus', 'Semenyo', 'Gravenberch', 'Cullen', 'Haaland', 'Gyökeres', 'João Pedro']
My Budget: £99.6m


In [None]:
# --- Prepare Data for Solver (Including Injury Penalty) ---

if 'fpl_player_df' in locals() and fpl_player_df is not None:
    # --- Prepare Base DataFrame 'df' ---
    df = fpl_player_df.copy()
    df['form_points'] = df['form_points'].fillna(0)
    df['cost'] = pd.to_numeric(df['cost'], errors='coerce')

    # --- *** APPLY INJURY PENALTY *** ---
    unavailable_statuses = ['i', 's', 'u'] # Injured, Suspended, Unavailable
    doubtful_status = 'd' # Doubtful

    df.loc[df['status'].isin(unavailable_statuses), 'form_points'] = 0.0
    df.loc[df['status'] == doubtful_status, 'form_points'] *= 0.5 # Halve points for doubtful
    print("Applied injury penalties to 'form_points'.")
    # --- *** END INJURY PENALTY *** ---

    # --- Clean and Create Dummies ---
    df = df.dropna(subset=['cost', 'team', 'position', 'status']) # Added status here

    # Position dummies
    pos_dummies = pd.get_dummies(df['position'])
    df = pd.concat([df, pos_dummies], axis=1)

    # Team dummies
    team_names = df['team'].unique().tolist()
    team_dummies = pd.get_dummies(df['team'])
    df = pd.concat([df, team_dummies], axis=1)

    # Set index (handle non-unique - important)
    if not df.index.is_unique:
        print("Warning: DataFrame index is not unique. Using 'id' column as index.")
        if 'id' in df.columns and df['id'].is_unique:
             df = df.set_index('id')
        else:
             print("Error: Cannot set a unique index. Trying default integer index.")
             df = df.reset_index(drop=True) # Fallback to integer index
    # If index IS unique, just use it
    elif df.index.name != 'id' and 'id' in df.columns: # If index exists but isn't 'id', try setting 'id'
         print("Setting 'id' column as index.")
         df = df.set_index('id')

    # Ensure index is set correctly before proceeding
    if not df.index.is_unique:
         print("CRITICAL ERROR: Failed to create a unique index for the DataFrame. Cannot proceed.")
         df = None # Prevent further execution


    if df is not None:
        # Store team names associated with the index for constraints
        team_id_map = df['team'].to_dict() # Maps index -> team name
        team_id_set = set(team_id_map.values()) # Unique team names

        # --- Validate Manual Team Input ---
        user_squad_indices = []
        squad_cost = 0.0
        squad_form_total_pre_penalty = 0.0 # Form before penalty

        print("\n--- Validating Your Squad (Using injury-adjusted data) ---")
        name_to_index_lookup = df.reset_index().set_index('name')[df.index.name] # Map name to index value

        for player_name in MY_SQUAD_NAMES:
            try:
                player_index = name_to_index_lookup.loc[player_name]
                # Handle cases where multiple players have the same name - take first match
                if isinstance(player_index, pd.Series):
                    player_index = player_index.iloc[0]

                player_cost = df.loc[player_index, 'cost']
                player_form_adjusted = df.loc[player_index, 'form_points'] # Adjusted form
                player_status = df.loc[player_index, 'status']
                # Get original form points if available (requires original df)
                player_form_original = fpl_player_df.set_index(df.index.name).loc[player_index, 'form_points'] if df.index.name in fpl_player_df.columns or fpl_player_df.index.name == df.index.name else player_form_adjusted


                status_desc = {'a': 'Available','d': 'Doubtful','i': 'Injured','s': 'Suspended','u': 'Unavailable'}.get(player_status, f'Unknown ({player_status})')

                user_squad_indices.append(player_index)
                squad_cost += player_cost
                squad_form_total_pre_penalty += player_form_original

                print(f"Found: {player_name} (Idx: {player_index}, Cost: £{player_cost:.1f}m, Form(Adj): {player_form_adjusted:.2f}, Status: {status_desc})")

            except KeyError:
                 print(f"!!! WARNING: Could not find player '{player_name}' in the prepared data. Skipping.")
            except Exception as e:
                 print(f"!!! ERROR finding {player_name}: {e}. Skipping.")

        print(f"\nFound {len(user_squad_indices)} out of 15 players.")

        if len(user_squad_indices) != 15:
            print("--- ERROR: Your squad list is not complete or contains invalid names. ---")
        else:
            print(f"\nCurrent Squad Market Cost: £{squad_cost:.1f}m")
            print(f"Current Squad Total Form (Original): {squad_form_total_pre_penalty:.2f}")
            print(f"Your Total Budget: £{MY_TOTAL_BUDGET:.1f}m")

else:
    print("Cannot proceed. FPL player data ('fpl_player_df') not available or failed to load.")

Applied injury penalties to 'form_points'.
Setting 'id' column as index.

--- Validating Your Squad (Using injury-adjusted data) ---
Found: Roefs (Idx: 670, Cost: £4.6m, Form(Adj): 17.39, Status: Available)
Found: Vicario (Idx: 565, Cost: £5.1m, Form(Adj): 8.49, Status: Available)
Found: Calafiori (Idx: 7, Cost: £5.8m, Form(Adj): 14.37, Status: Available)
Found: Senesi (Idx: 72, Cost: £5.0m, Form(Adj): 13.33, Status: Available)
Found: Guéhi (Idx: 260, Cost: £4.9m, Form(Adj): 15.33, Status: Available)
Found: Gudmundsson (Idx: 347, Cost: £4.0m, Form(Adj): 4.90, Status: Available)
Found: Chalobah (Idx: 226, Cost: £5.1m, Form(Adj): 6.40, Status: Available)
Found: Saka (Idx: 16, Cost: £10.0m, Form(Adj): 12.41, Status: Available)
Found: Kudus (Idx: 582, Cost: £6.8m, Form(Adj): 14.74, Status: Available)
Found: Semenyo (Idx: 82, Cost: £8.1m, Form(Adj): 26.06, Status: Available)
Found: Gravenberch (Idx: 390, Cost: £5.7m, Form(Adj): 6.24, Status: Doubtful)
Found: Cullen (Idx: 205, Cost: £5.0m, F

In [None]:
# --- Run Simulations ---

# Dictionaries to store results
starting_lineups = {}
bench_lineups = {}

if 'df' in locals() and df is not None and len(user_squad_indices) == 15:
    print("\n--- Running Simulations (using injury-adjusted form_points) ---")

    # --- 1. Get Current Team Score ---
    print("Sim 1/5: Analyzing current team...")
    current_squad_df = df.loc[user_squad_indices]
    form_current_team_11_df, bench_current_df = select_starting_11(current_squad_df)
    if form_current_team_11_df is None: form_current_team_11_df = pd.DataFrame(); bench_current_df = pd.DataFrame()
    form_current_11_total = form_current_team_11_df['form_points'].sum() if not form_current_team_11_df.empty else 0
    starting_lineups['Current'] = form_current_team_11_df.index.tolist()
    bench_lineups['Current'] = bench_current_df.index.tolist()


    # --- 2. Simulate 1 Free Transfer ---
    print("Sim 2/5: Simulating 1 Free Transfer...")
    squad_1ft_indices, squad_form_1ft, in_1ft_indices, out_1ft_indices = find_best_transfer_squad(
        user_squad_indices, 1, MY_TOTAL_BUDGET, df
    )
    if squad_1ft_indices:
        squad_1ft_df = df.loc[squad_1ft_indices]
        form_1ft_11_df, bench_1ft_df = select_starting_11(squad_1ft_df)
        if form_1ft_11_df is None: form_1ft_11_df = pd.DataFrame(); bench_1ft_df = pd.DataFrame()
        form_1ft_11_total = form_1ft_11_df['form_points'].sum() if not form_1ft_11_df.empty else 0
        starting_lineups['1FT'] = form_1ft_11_df.index.tolist()
        bench_lineups['1FT'] = bench_1ft_df.index.tolist()
    else:
        print("WARNING: Could not find a valid 1-transfer solution.")
        form_1ft_11_total = 0.0; squad_form_1ft = 0.0
        in_1ft_indices, out_1ft_indices = [], []
        starting_lineups['1FT'] = []; bench_lineups['1FT'] = []


    # --- 3. Simulate 2 Transfers (-4 Hit) ---
    print("Sim 3/5: Simulating 2 Transfers (-4 hit)...")
    squad_2ft_indices, squad_form_2ft, in_2ft_indices, out_2ft_indices = find_best_transfer_squad(
        user_squad_indices, 2, MY_TOTAL_BUDGET, df
    )
    if squad_2ft_indices:
        squad_2ft_df = df.loc[squad_2ft_indices]
        form_2ft_11_df, bench_2ft_df = select_starting_11(squad_2ft_df)
        if form_2ft_11_df is None: form_2ft_11_df = pd.DataFrame(); bench_2ft_df = pd.DataFrame()
        form_2ft_11_total = form_2ft_11_df['form_points'].sum() if not form_2ft_11_df.empty else 0
        form_2ft_hit = form_2ft_11_total - 4.0
        starting_lineups['2FT'] = form_2ft_11_df.index.tolist()
        bench_lineups['2FT'] = bench_2ft_df.index.tolist()
    else:
        print("WARNING: Could not find a valid 2-transfer solution.")
        form_2ft_hit = 0.0
        in_2ft_indices, out_2ft_indices = [], []
        starting_lineups['2FT'] = []; bench_lineups['2FT'] = []

    # --- 4. Simulate Free Hit ---
    print("Sim 4/5: Simulating 'Free Hit' chip...")
    squad_fh_df = select_optimal_team(df) # Uses your 15-man solver
    if squad_fh_df is not None:
        form_fh_11_df, bench_fh_df = select_starting_11(squad_fh_df)
        if form_fh_11_df is None: form_fh_11_df = pd.DataFrame(); bench_fh_df = pd.DataFrame()
        form_fh_11_total = form_fh_11_df['form_points'].sum() if not form_fh_11_df.empty else 0
        starting_lineups['FreeHit'] = form_fh_11_df.index.tolist()
        bench_lineups['FreeHit'] = bench_fh_df.index.tolist()
    else:
        print("WARNING: Could not find a valid 'Free Hit' solution.")
        form_fh_11_total = 0.0
        starting_lineups['FreeHit'] = []; bench_lineups['FreeHit'] = []

    # --- 5. Get Bench Boost Scores ---
    print("Sim 5/5: Calculating 'Bench Boost' scores...")
    form_bb_current = df.loc[user_squad_indices]['form_points'].sum()
    form_bb_1ft = squad_form_1ft # From the 1FT squad simulation

    print("\n--- All Simulations Complete! ---")
else:
    print("Could not run simulations. Check FPL data loading, preparation, and team validation.")


--- Running Simulations (using injury-adjusted form_points) ---
Sim 1/5: Analyzing current team...

--- Starting Optimal 11 Selection (using PuLP) ---
Solving for best 11...
*** Optimal Starting 11 Found ***
Total Starting Form Points (Maximized): 175.22212888136164
Sim 2/5: Simulating 1 Free Transfer...

--- Starting Optimal 11 Selection (using PuLP) ---
Solving for best 11...
*** Optimal Starting 11 Found ***
Total Starting Form Points (Maximized): 185.288742882598
Sim 3/5: Simulating 2 Transfers (-4 hit)...

--- Starting Optimal 11 Selection (using PuLP) ---
Solving for best 11...
*** Optimal Starting 11 Found ***
Total Starting Form Points (Maximized): 195.79863310669356
Sim 4/5: Simulating 'Free Hit' chip...

--- Starting Optimal 15-Man Squad Selection (using PuLP) ---
Solving 15-man squad problem...
Status: Optimal

*** Optimal 15-Man Squad Found! ***
Total Cost: £97.0m
Total Form Points (Maximized): 275.69

--- Starting Optimal 11 Selection (using PuLP) ---
Solving for best 11.

In [None]:
# --- Final Recommendations ---

if 'df' in locals() and df is not None and 'form_current_11_total' in locals():
    print("\n============================================")
    print(f" FPL Transfer Advisor (Based on Injury-Adjusted Form)")
    print("============================================")

    # --- Base Score ---
    print(f"\n--- OPTION 1: DO NOTHING ---")
    print(f"Current Optimal 11 Form Points: {form_current_11_total:.2f}")

    # --- One Transfer ---
    print(f"\n--- OPTION 2: 1 FREE TRANSFER ---")
    if form_1ft_11_total > 0:
        transfer_out_names = [df.loc[idx]['name'] for idx in out_1ft_indices]
        transfer_in_names = [df.loc[idx]['name'] for idx in in_1ft_indices]
        print(f"Recommended Transfer: {transfer_out_names} -> {transfer_in_names}")
        print(f"New Optimal 11 Form Points:     {form_1ft_11_total:.2f}")
        print(f"Net Gain vs. Current:           {form_1ft_11_total - form_current_11_total:.2f} points")
    else:
        print("No feasible 1FT solution found.")

    # --- Two Transfers ---
    print(f"\n--- OPTION 3: 2 TRANSFERS (-4 HIT) ---")
    if form_2ft_hit > -4.0:
        transfer_out_names_2 = [df.loc[idx]['name'] for idx in out_2ft_indices]
        transfer_in_names_2 = [df.loc[idx]['name'] for idx in in_2ft_indices]
        print(f"Recommended Transfers: {transfer_out_names_2} -> {transfer_in_names_2}")
        print(f"New Optimal 11 Form Points (after -4): {form_2ft_hit:.2f}")
        print(f"Net Gain vs. Current:                  {form_2ft_hit - form_current_11_total:.2f} points")
    else:
         print("No feasible 2FT solution found.")

    print("\n--------------------------------------------")
    print(" CHIP ADVICE")
    print("--------------------------------------------")

    # --- Bench Boost ---
    print(f"\n--- BENCH BOOST CHIP ---")
    print(f"Current Squad 'Bench Boost' Form: {form_bb_current:.2f}")
    print(f"Squad *after 1 FT* 'Bench Boost' Form: {form_bb_1ft:.2f}")

    # --- Free Hit ---
    print(f"\n--- FREE HIT CHIP ---")
    if form_fh_11_total > 0:
        print(f"Optimal 'Free Hit' Team Form Points (Starting 11): {form_fh_11_total:.2f}")
        best_feasible_1ft_form = form_1ft_11_total if form_1ft_11_total > 0 else form_current_11_total
        print(f"Net Gain vs. Best Feasible 1-FT Team: {form_fh_11_total - best_feasible_1ft_form:.2f} points")
    else:
        print("Could not calculate Free Hit score.")

    print("\n============================================")
    print(" FINAL RECOMMENDATION ")
    print("============================================")

    # --- Final Logic ---
    options = {
        "Do Nothing": form_current_11_total,
        "Make 1 Free Transfer": form_1ft_11_total if form_1ft_11_total > 0 else -999,
        "Take a -4 Hit (2 Transfers)": form_2ft_hit if form_2ft_hit > -4.0 else -999,
    }

    best_move = max(options, key=options.get)
    best_form = options[best_move]

    if best_form <= -999:
       print("No feasible standard transfer moves found. Best option is 'Do Nothing'.")
       print(f"Current Optimal 11 Form Points: {form_current_11_total:.2f}")
    else:
        print(f"Your best *standard* move is: **{best_move}** (Form Points: {best_form:.2f})")

        if best_move == "Make 1 Free Transfer":
            print(f"-> Transfer {transfer_out_names} OUT for {transfer_in_names}")
        elif best_move == "Take a -4 Hit (2 Transfers)":
            print(f"-> Transfer {transfer_out_names_2} OUT for {transfer_in_names_2}")

    # Chip advice
    best_standard_feasible_form = best_form if best_form > -999 else form_current_11_total

    # Define thresholds for chip recommendations (can be adjusted)
    FREE_HIT_GAIN_THRESHOLD = 6.0
    BENCH_BOOST_BENCH_POINTS_THRESHOLD = 15.0 # Check points ON THE BENCH after best feasible move

    # Calculate bench points after best feasible move
    bench_points_after_best_move = 0
    if best_move == "Make 1 Free Transfer" and squad_1ft_indices:
        bench_indices = bench_lineups.get('1FT', [])
        if bench_indices: bench_points_after_best_move = df.loc[bench_indices]['form_points'].sum()
    elif best_move == "Take a -4 Hit (2 Transfers)" and squad_2ft_indices:
        bench_indices = bench_lineups.get('2FT', [])
        if bench_indices: bench_points_after_best_move = df.loc[bench_indices]['form_points'].sum()
    else: # Do Nothing
        bench_indices = bench_lineups.get('Current', [])
        if bench_indices: bench_points_after_best_move = df.loc[bench_indices]['form_points'].sum()


    if form_fh_11_total > 0 and (form_fh_11_total - best_standard_feasible_form) > FREE_HIT_GAIN_THRESHOLD:
        print(f"\n**CHIP ALERT:** Using **Free Hit** could gain ~{form_fh_11_total - best_standard_feasible_form:.1f} points over your best standard move.")
    elif bench_points_after_best_move > BENCH_BOOST_BENCH_POINTS_THRESHOLD:
        print(f"\n**CHIP ALERT:** Your bench has ~{bench_points_after_best_move:.1f} points after your best move. Consider **Bench Boost**.")
    else:
        print("\nNo chips strongly recommended this week.")

else:
    print("Could not run recommendations. Check FPL data loading, preparation, and team validation.")


 FPL Transfer Advisor (Based on Injury-Adjusted Form)

--- OPTION 1: DO NOTHING ---
Current Optimal 11 Form Points: 175.22

--- OPTION 2: 1 FREE TRANSFER ---
Recommended Transfer: ['Gyökeres'] -> ['Thiago']
New Optimal 11 Form Points:     185.29
Net Gain vs. Current:           10.07 points

--- OPTION 3: 2 TRANSFERS (-4 HIT) ---
Recommended Transfers: ['Gudmundsson', 'Gyökeres'] -> ['Thiago', 'Mukiele']
New Optimal 11 Form Points (after -4): 191.80
Net Gain vs. Current:                  16.58 points

--------------------------------------------
 CHIP ADVICE
--------------------------------------------

--- BENCH BOOST CHIP ---
Current Squad 'Bench Boost' Form: 201.25
Squad *after 1 FT* 'Bench Boost' Form: 211.32

--- FREE HIT CHIP ---
Optimal 'Free Hit' Team Form Points (Starting 11): 216.23
Net Gain vs. Best Feasible 1-FT Team: 30.94 points

 FINAL RECOMMENDATION 
Your best *standard* move is: **Take a -4 Hit (2 Transfers)** (Form Points: 191.80)
-> Transfer ['Gudmundsson', 'Gyökeres

In [None]:
# --- Visualize Lineups ---

def display_lineup(title, starter_indices, bench_indices, player_data):
    """Prints the starting 11 and bench for a given scenario."""
    print("\n" + "="*30)
    print(f" {title} Lineup")
    print("="*30)

    if not starter_indices:
        print("  -> No valid lineup found for this scenario.")
        return

    try:
        # Get DataFrames for starters and bench
        starters_df = player_data.loc[starter_indices].copy()
        bench_df = player_data.loc[bench_indices].copy()

        # Add readable status
        status_map = {'a': 'Avail', 'd': 'Doubt', 'i': 'Inj', 's': 'Susp', 'u': 'Unav'}
        starters_df['status_desc'] = starters_df['status'].map(status_map).fillna('Unk')
        bench_df['status_desc'] = bench_df['status'].map(status_map).fillna('Unk')


        # Sort starters by position then form_points
        starters_df['pos_order'] = starters_df['position'].map({'GKP': 1, 'DEF': 2, 'MID': 3, 'FWD': 4})
        starters_df = starters_df.sort_values(by=['pos_order', 'form_points'], ascending=[True, False])

        # Sort bench (GK usually last, others by form points)
        bench_df['pos_order'] = bench_df['position'].map({'GKP': 1, 'DEF': 2, 'MID': 3, 'FWD': 4})
        bench_df = bench_df.sort_values(by=['pos_order', 'form_points'], ascending=[True, False]) # Sort bench by form points too

        print("\n--- Starting 11 ---")
        print(starters_df[['name', 'position', 'team', 'cost', 'form_points', 'status_desc']].to_string(index=False))
        print(f"Total Starting Form: {starters_df['form_points'].sum():.2f}")

        print("\n--- Bench (Order: Sub1, Sub2, Sub3, SubGK) ---")
        # Display bench order (GK last unless highest non-GK form)
        non_gk_bench = bench_df[bench_df['position'] != 'GKP'].sort_values('form_points', ascending=False)
        gk_bench = bench_df[bench_df['position'] == 'GKP']
        bench_display_order = pd.concat([non_gk_bench, gk_bench])

        print(bench_display_order[['name', 'position', 'team', 'cost', 'form_points', 'status_desc']].to_string(index=False, header=False))

    except KeyError as e:
         print(f"Error displaying lineup: Missing data or index issue - {e}")
    except Exception as e:
         print(f"An unexpected error occurred during display: {e}")


# --- Display the results ---
if 'starting_lineups' in locals() and 'df' in locals() and df is not None:
    display_lineup("Current Team", starting_lineups.get('Current', []), bench_lineups.get('Current', []), df)
    display_lineup("After 1 Free Transfer", starting_lineups.get('1FT', []), bench_lineups.get('1FT', []), df)
    display_lineup("After 2 Transfers (-4 Hit)", starting_lineups.get('2FT', []), bench_lineups.get('2FT', []), df)
    display_lineup("Free Hit Team", starting_lineups.get('FreeHit', []), bench_lineups.get('FreeHit', []), df)
else:
    print("Could not display lineups. Ensure simulations ran correctly.")


 Current Team Lineup

--- Starting 11 ---
      name position           team  cost  form_points status_desc
     Roefs      GKP     Sunderland   4.6    17.389652       Avail
     Guéhi      DEF Crystal Palace   4.9    15.329710       Avail
 Calafiori      DEF        Arsenal   5.8    14.366628       Avail
    Senesi      DEF    Bournemouth   5.0    13.326665       Avail
   Semenyo      MID    Bournemouth   8.1    26.057628       Avail
     Kudus      MID          Spurs   6.8    14.741099       Avail
    Cullen      MID        Burnley   5.0    13.114877       Avail
      Saka      MID        Arsenal  10.0    12.405644       Avail
   Haaland      FWD       Man City  14.7    31.951526       Avail
João Pedro      FWD        Chelsea   7.5     9.055385       Avail
  Gyökeres      FWD        Arsenal   9.0     7.483315       Avail
Total Starting Form: 175.22

--- Bench (Order: Sub1, Sub2, Sub3, SubGK) ---
   Chalobah DEF   Chelsea 5.1 6.403124 Avail
Gravenberch MID Liverpool 5.7 6.244998 Doubt