# FPL Gameweek Analysis – Notebook

End-to-end workflow to:

### A:
    - Load imports 
    - Declare constants

### B:
    - Load FPL data from csv files
    - Sanity check

### C:
    - Add Position
    - Drop timestamps
    - Attache team data to players

- Build a per-player, per-gameweek feature table
- Train a Random Forest regression model to predict points
- Evaluate the model with a time-based validation
- Build a snapshot for a target gameweek and generate predictions
- Add consistency metrics & tiers
- Select a best XI via ILP (strategy tuned via prior experimentation)
- Suggest transfers and pick a starting XI from a given squad

**Assumptions:**

- `players.csv`, `teams.csv`, `player_history.csv`, `fixtures.csv` are in the working directory.
- Column names broadly match the standard FPL export (some robustness is built in).
- You'll set `TARGET_GW` near the top of the notebook.


## A. Imports & Global Config

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

import pulp

plt.rcParams['figure.figsize'] = (10, 5)

RANDOM_STATE = 42

TARGET_GW =19 # <-- Gameweek we are working to predict, change this as needed
N_VAL_GWS = 4


## B. Data Loading

Load the four core CSVs and do basic sanity checks.

In [None]:
players_path = "data/players.csv"
teams_path = "data/teams.csv"
history_path = "data/player_history.csv"
fixtures_path = "data/fixtures.csv"

players = pd.read_csv(players_path)
teams = pd.read_csv(teams_path)
history = pd.read_csv(history_path)
fixtures = pd.read_csv(fixtures_path)

print("\nShapes:")
print("players:", players.shape)
print("teams:", teams.shape)
print("history:", history.shape)
print("fixtures:", fixtures.shape)

print("\nColumns:")
print("players: ", players.columns.tolist())
print("teams: ", teams.columns.tolist())
print("fixtures: ", fixtures.columns.tolist())
print("history: ", history.columns.tolist())

## C. Normalise Players & Teams

We derive a `position_label` and attach team metadata to players.


In [None]:
def derive_position_label(players_df):
    df = players_df.copy()
    pos_col = None
    for cand in ["position", "element_type", "pos"]:
        if cand in df.columns:
            pos_col = cand
            break
    if pos_col is None:
        raise ValueError("No position column found in players.csv (expected one of: position, element_type, pos).")
    if np.issubdtype(df[pos_col].dtype, np.number):
        mapping = {1: "GK", 2: "DEF", 3: "MID", 4: "FWD"}
        df["position_label"] = df[pos_col].map(mapping)
    else:
        mapping = {
            "GKP": "GK", "GK": "GK",
            "DEF": "DEF", "D": "DEF",
            "MID": "MID", "M": "MID",
            "FWD": "FWD", "F": "FWD",
        }
        df["position_label"] = df[pos_col].map(mapping).fillna(df[pos_col])
    return df

players = derive_position_label(players)

teams_ren = teams.rename(columns={
    "id": "team_row_id",
    "fpl_team_id": "team_fpl_id"
})

team_key_candidates = ["team_id", "team", "team_fpl_id"]
team_key = None
for cand in team_key_candidates:
    if cand in players.columns:
        team_key = cand
        break
if team_key is None:
    raise ValueError("Could not find team reference column in players (expected team_id/team/team_fpl_id).")

if "team_fpl_id" in teams_ren.columns and players[team_key].isin(teams_ren["team_fpl_id"]).all():
    players_meta = players.merge(
        teams_ren,
        left_on=team_key,
        right_on="team_fpl_id",
        how="left"
    )
else:
    players_meta = players.merge(
        teams_ren,
        left_on=team_key,
        right_on="team_row_id",
        how="left"
    )

players_meta.rename(columns={
    "name": "team_name",
    "short_name": "team_short_name",
    "strength": "team_strength"
}, inplace=True)

players_meta.drop(columns={
    "updated_at",
    "created_at"
}, inplace=True)

print("players_meta columns:")
print(players_meta.columns.tolist())
display(players_meta.head())


## D. Build Master Per-Player Per-GW Table

Join history to players & fixtures.

In [None]:
# incase columns are named differently. 

player_id_key_candidates = ["fpl_player_id", "element", "id"]
player_id_key = None
for cand in player_id_key_candidates:
    if cand in players_meta.columns:
        player_id_key = cand
        break
if player_id_key is None:
    raise ValueError("Could not find FPL player id column in players_meta (expected fpl_player_id/element/id).")

master = history.merge(
    players_meta,
    left_on="player_id",
    right_on=player_id_key,
    how="left",
    suffixes=("", "_player")
)

fix = fixtures.copy()

fix_long_home = fix.copy()
fix_long_home["team_fpl_id"] = fix_long_home["team_h"]
fix_long_home["opponent_fpl_id"] = fix_long_home["team_a"]
fix_long_home["is_home"] = True
fix_long_home["fixture_difficulty"] = fix_long_home["difficulty_h"]
fix_long_home["goals_for"] = fix_long_home["team_h_score"]
fix_long_home["goals_against"] = fix_long_home["team_a_score"]

fix_long_away = fix.copy()
fix_long_away["team_fpl_id"] = fix_long_away["team_a"]
fix_long_away["opponent_fpl_id"] = fix_long_away["team_h"]
fix_long_away["is_home"] = False
fix_long_away["fixture_difficulty"] = fix_long_away["difficulty_a"]
fix_long_away["goals_for"] = fix_long_away["team_a_score"]
fix_long_away["goals_against"] = fix_long_away["team_h_score"]

fix_long = pd.concat([fix_long_home, fix_long_away], ignore_index=True)
for col in ["goals_for", "goals_against"]:
    fix_long[col] = fix_long[col].fillna(0.0)

if "gameweek" not in master.columns:
    raise ValueError("Expected 'gameweek' column in player_history.")

master = master.merge(
    fix_long[["event", "team_fpl_id", "opponent_fpl_id", "is_home", "fixture_difficulty", "goals_for", "goals_against"]],
    left_on=["gameweek", "team_fpl_id"],
    right_on=["event", "team_fpl_id"],
    how="left"
)
master.drop(columns=["event"], inplace=True)
master["is_home_int"] = master["is_home"].astype(float)

print("master shape:", master.shape)
display(master.head())


## E. Team-Level Rolling Features

In [None]:
#display(fix_long[fix_long["team_fpl_id"]==1])
team_gw = (
    fix_long
    .groupby(["team_fpl_id", "event"], as_index=False)
    .agg(
        goals_for=("goals_for", "sum"),
        goals_against=("goals_against", "sum"),
    )
    .rename(columns={"event": "gameweek"})
)

team_gw = team_gw.sort_values(["team_fpl_id", "gameweek"])
team_gw["goals_for_3gw"] = (
    team_gw
    .groupby("team_fpl_id")["goals_for"]
    .rolling(3, min_periods=1)
    .mean()
    .reset_index(level=0, drop=True)
)
team_gw["goals_against_3gw"] = (
    team_gw
    .groupby("team_fpl_id")["goals_against"]
    .rolling(3, min_periods=1)
    .mean()
    .reset_index(level=0, drop=True)
)

master = master.merge(
    team_gw[["team_fpl_id", "gameweek", "goals_for_3gw", "goals_against_3gw"]],
    on=["team_fpl_id", "gameweek"],
    how="left"
)

opp_gw = team_gw.rename(columns={
    "team_fpl_id": "opponent_fpl_id",
    "goals_for_3gw": "opp_goals_for_3gw",
    "goals_against_3gw": "opp_goals_against_3gw"
})

master = master.merge(
    opp_gw[["opponent_fpl_id", "gameweek", "opp_goals_for_3gw", "opp_goals_against_3gw"]],
    on=["opponent_fpl_id", "gameweek"],
    how="left"
)

print("master with team features shape:", master.shape)
display(master.head())


## F. Player-Level Rolling Features & Form

In [None]:
master = master.sort_values(["player_id", "gameweek"])

def add_player_rolling_features(df):
    df = df.copy()
    grp = df.groupby("player_id")
    df["roll_points_3wk"] = grp["total_points"].rolling(3, min_periods=1).mean().reset_index(level=0, drop=True)
    df["roll_points_5wk"] = grp["total_points"].rolling(5, min_periods=1).mean().reset_index(level=0, drop=True)
    df["form_custom"] = 0.62 * df["roll_points_3wk"] + 0.38 * df["roll_points_5wk"]
    df["roll_minutes_3wk"] = grp["minutes"].rolling(3, min_periods=1).mean().reset_index(level=0, drop=True)
    df["lag1_points"] = grp["total_points"].shift(1)
    df["lag1_minutes"] = grp["minutes"].shift(1)
    if "ict_index" in df.columns:
        df["ict_index_season"] = grp["ict_index"].expanding().mean().reset_index(level=0, drop=True)
    else:
        df["ict_index_season"] = np.nan
    return df

master = add_player_rolling_features(master)

if "now_cost" in master.columns:
    master["now_cost_m"] = master["now_cost"] / 10.0
else:
    master["now_cost_m"] = np.nan

if "team_strength" in master.columns:
    opp_strength = teams_ren.rename(columns={
        "team_fpl_id": "opponent_fpl_id",
        "team_strength": "opp_team_strength"
    })[["opponent_fpl_id", "opp_team_strength"]] if "team_strength" in teams_ren.columns else None
    if opp_strength is not None:
        master = master.merge(opp_strength, on="opponent_fpl_id", how="left")
        master["team_vs_opp_strength"] = master["team_strength"] - master["opp_team_strength"]
    else:
        master["team_vs_opp_strength"] = np.nan
else:
    master["team_vs_opp_strength"] = np.nan

print("master with rolling features shape:", master.shape)
display(master.head(20))


master.to_csv("data/master_data.csv", index=False)

## G. Modeling Dataset & Time-Based Split

In [None]:
target_col = "total_points"

numeric_features = [
    "form_custom",
    "roll_points_3wk",
    "roll_points_5wk",
    "roll_minutes_3wk",
    "lag1_points",
    "lag1_minutes",
    "minutes",
    "fixture_difficulty",
    "goals_for_3gw",
    "goals_against_3gw",
    "opp_goals_for_3gw",
    "opp_goals_against_3gw",
    "ict_index_season",
    "now_cost_m",
    "team_vs_opp_strength",
    "is_home_int",
]

model_df = master[master["roll_points_5wk"] > 0].copy()
#model_df = master.copy()
model_df = model_df.sort_values(["player_id", "gameweek"])
model_df = model_df.dropna(subset=[target_col])

cat_features = ["position_label"]

core_cols = ["player_id", "player_name", "team_short_name", "gameweek", target_col]
core_cols = [c for c in core_cols if c in model_df.columns]
cols_needed = core_cols + [c for c in numeric_features if c in model_df.columns] + cat_features
cols_needed = list(dict.fromkeys(cols_needed))
model_df = model_df[cols_needed].copy()

print("model_df shape:", model_df.shape)
display(model_df.head())

all_gws = sorted(model_df["gameweek"].unique())
if len(all_gws) <= N_VAL_GWS:
    val_gws = all_gws[int(len(all_gws) / 2):]
else:
    val_gws = all_gws[-N_VAL_GWS:]
train_gws = [gw for gw in all_gws if gw not in val_gws]

print("Train GWs:", train_gws)
print("Val GWs:", val_gws)

train_mask = model_df["gameweek"].isin(train_gws)
val_mask = model_df["gameweek"].isin(val_gws)

X_num_train = model_df.loc[train_mask, numeric_features].fillna(0.0)
X_cat_train = pd.get_dummies(model_df.loc[train_mask, cat_features], drop_first=False)
X_train = pd.concat([X_num_train, X_cat_train], axis=1)
y_train = model_df.loc[train_mask, target_col].values

X_num_val = model_df.loc[val_mask, numeric_features].fillna(0.0)
X_cat_val = pd.get_dummies(model_df.loc[val_mask, cat_features], drop_first=False)
X_cat_val = X_cat_val.reindex(columns=X_cat_train.columns, fill_value=0)
X_val = pd.concat([X_num_val, X_cat_val], axis=1)
y_val = model_df.loc[val_mask, target_col].values

print("Train X shape:", X_train.shape, "Val X shape:", X_val.shape)


## H. Baseline & Random Forest Models

In [None]:
baseline_val_pred = model_df.loc[val_mask, "form_custom"].fillna(0.0).values

rf_full = RandomForestRegressor(
    n_estimators=300,
    random_state=RANDOM_STATE,
    n_jobs=-1,
)
rf_full.fit(X_train, y_train)
y_val_pred_rf = rf_full.predict(X_val)

def print_metrics(name, y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    print(f"{name:20s} MAE={mae:6.3f} | R²={r2:6.3f}")

print("Validation performance (target = total_points):")
print_metrics("Baseline (form)", y_val, baseline_val_pred)
print_metrics("RF full",         y_val, y_val_pred_rf)

fi = pd.DataFrame({
    "feature": X_train.columns,
    "importance": rf_full.feature_importances_
}).sort_values("importance", ascending=False)

print("\nTop RF feature importances:")
display(fi.head(20))


## I. Consistency Metrics

In [None]:
cons_cols = ["player_id", "web_name", "team_short_name", "position_label", "total_points", "minutes"]
missing = [c for c in cons_cols if c not in master.columns]
if missing:
    raise ValueError(f"Missing columns in master for consistency calc: {missing}")

cons_df = master[cons_cols].copy()

player_cons_all = (
    cons_df.groupby(["player_id", "web_name", "team_short_name", "position_label"])
    .agg(
        points_mean=("total_points", "mean"),
        points_std=("total_points", "std"),
        minutes_mean=("minutes", "mean"),
    )
    .reset_index()
)

player_cons_all["points_std"] = player_cons_all["points_std"].fillna(0.0)
player_cons_all["consistency_score"] = player_cons_all["points_mean"] / (1.0 + player_cons_all["points_std"])

print("Player-level consistency sample:")
display(player_cons_all.head(10))


## J. Snapshot for Target GW & Predictions

In [None]:
snap_mask = model_df["gameweek"] == TARGET_GW

X_num_snap = model_df.loc[snap_mask, numeric_features].fillna(0.0)
X_cat_snap = pd.get_dummies(model_df.loc[snap_mask, cat_features], drop_first=False)
X_cat_snap = X_cat_snap.reindex(columns=X_train.columns[len(numeric_features):], fill_value=0)
X_snap = pd.concat([X_num_snap, X_cat_snap], axis=1)

snap_core_cols = ["player_id", "player_name", "team_short_name", "position_label", "gameweek"]
snap_core_cols = [c for c in snap_core_cols if c in model_df.columns]
snap_core = model_df.loc[snap_mask, snap_core_cols].copy()

snap_core["pred_rf_full"] = rf_full.predict(X_snap)
snap_core["form_custom"] = model_df.loc[snap_mask, "form_custom"].values

merge_cols = ["player_id", "gameweek"]
extra_cols = ["minutes", "now_cost_m", "fixture_difficulty", "is_home_int", "team_fpl_id", "opponent_fpl_id", "roll_minutes_3wk"]
extra_cols = [c for c in extra_cols if c in master.columns]

snapshot_df = snap_core.merge(
    master[merge_cols + extra_cols].drop_duplicates(merge_cols),
    on=["player_id", "gameweek"],
    how="left"
)

snapshot_df = snapshot_df.merge(
    player_cons_all[["player_id", "points_mean", "consistency_score"]],
    on="player_id",
    how="left"
)

print(f"Snapshot for TARGET_GW={TARGET_GW}:")
display(snapshot_df.head(20))


## K. Position Tiers

In [None]:
def add_position_tiers(df, score_col="pred_rf_full"):
    df = df.copy()
    if score_col not in df.columns:
        raise ValueError(f"{score_col} not in df.")
    def tier_for_group(g):
        pct = g[score_col].rank(pct=True)
        tiers = np.select(
            [
                pct >= 0.85,
                pct >= 0.70,
                pct >= 0.50,
            ],
            ["S", "A", "B"],
            default="C",
        )
        return pd.Series(tiers, index=g.index)
    df["tier"] = df.groupby("position_label", group_keys=False).apply(tier_for_group)
    return df

test = snapshot_df.merge(players_meta, left_on="player_id", right_on="fpl_player_id", how="inner")
snapshot_df["player_name"] = test["web_name"]
#print(test.columns)
snapshot_df = add_position_tiers(snapshot_df, score_col="pred_rf_full")
display(snapshot_df[[
    "player_id", "player_name", "team_short_name", "position_label",
    "pred_rf_full", "form_custom", "points_mean", "consistency_score", "tier"
]].head(20))


## L. Tuned Strategy Hyperparameters & Selection Score

In [None]:
CONS_MIN_POINTS   = 2.5
CONS_MIN_SCORE    = 0.3
CONS_BOOST_WEIGHT = 0.09

COST_WEIGHT   = 0.03
BLEND_W_FULL  = 0.7
BLEND_W_STRICT = 0.3

POS_WEIGHTS = {
    "GK": 1.00,
    "DEF": 0.95,
    "MID": 1.05,
    "FWD": 1.08,
}

def apply_consistency_filter(df,
                             min_points_mean=CONS_MIN_POINTS,
                             min_consistency=CONS_MIN_SCORE):
    df = df.copy()
    if "points_mean" not in df.columns or "consistency_score" not in df.columns:
        return df
    mask = (
        (df["points_mean"] >= min_points_mean) &
        (df["consistency_score"] >= min_consistency)
    )
    return df[mask].copy()

def build_selection_score(df):
    df = df.copy()
    if "pred_rf_full" not in df.columns:
        raise ValueError("pred_rf_full not in df; run prediction pipeline first.")
    base = df["pred_rf_full"].astype(float)
    if "form_custom" in df.columns:
        base += 0.05 * df["form_custom"].fillna(0.0)
    if "def_form_custom" in df.columns:
        base += np.where(
            df["position_label"] == "DEF",
            0.10 * df["def_form_custom"].fillna(0.0),
            0.0,
        )
    elif "def_attacking_index" in df.columns:
        base += np.where(
            df["position_label"] == "DEF",
            0.05 * df["def_attacking_index"].fillna(0.0),
            0.0,
        )
    if "now_cost_m" in df.columns:
        base += COST_WEIGHT * df["now_cost_m"].fillna(0.0)
    if "position_label" in df.columns and "POS_WEIGHTS" in globals():
        pos_w = df["position_label"].map(POS_WEIGHTS).fillna(1.0)
        base = base * pos_w
    if "consistency_score" in df.columns:
        cons = df["consistency_score"].fillna(0.0)
        cons_clipped = np.clip(cons, 0, 3)
        base += CONS_BOOST_WEIGHT * cons_clipped
    return base

snapshot_df["score_for_xi"] = build_selection_score(snapshot_df)
display(snapshot_df[[
    "player_id", "player_name", "team_short_name", "position_label",
    "pred_rf_full", "form_custom", "now_cost_m",
    "points_mean", "consistency_score", "tier", "score_for_xi"
]].head(20))


## M. Best XI Selector (ILP)

In [None]:
def select_best_xi_ilp(
    df,
    budget_m=100.0,
    max_from_team=3,
    use_active_filter=True,
    use_consistency_filter=True,
    score_col="score_for_xi",
):
    df = df.copy()
    if use_active_filter and "roll_minutes_3wk" in df.columns:
        df = df[df["roll_minutes_3wk"] > 0].copy()
    if use_consistency_filter:
        df = apply_consistency_filter(df)
    if df.empty:
        raise ValueError("No players left after filters in select_best_xi_ilp.")
    if score_col not in df.columns:
        df[score_col] = build_selection_score(df)
    if "now_cost_m" not in df.columns:
        raise ValueError("now_cost_m not in df; needed for budget constraint.")
    df = df[df[score_col].notna()].copy()
    if df.empty:
        raise ValueError("No players with valid scores in select_best_xi_ilp.")
    prob = pulp.LpProblem("Best_XI", pulp.LpMaximize)
    indices = list(df.index)
    x = {i: pulp.LpVariable(f"x_{i}", cat="Binary") for i in indices}
    prob += pulp.lpSum(df.loc[i, score_col] * x[i] for i in indices)
    prob += pulp.lpSum(x[i] for i in indices) == 11
    pos_bounds = {
        "GK": (1, 1),
        "DEF": (3, 5),
        "MID": (2, 5),
        "FWD": (1, 3),
    }
    for pos, (min_c, max_c) in pos_bounds.items():
        idx_pos = df.index[df["position_label"] == pos].tolist()
        if idx_pos:
            prob += pulp.lpSum(x[i] for i in idx_pos) >= min_c
            prob += pulp.lpSum(x[i] for i in idx_pos) <= max_c
    prob += pulp.lpSum(df.loc[i, "now_cost_m"] * x[i] for i in indices) <= budget_m
    for team, grp in df.groupby("team_short_name"):
        idx_team = grp.index.tolist()
        prob += pulp.lpSum(x[i] for i in idx_team) <= max_from_team
    _ = prob.solve(pulp.PULP_CBC_CMD(msg=False))
    selected_idx = [i for i in indices if pulp.value(x[i]) == 1]
    xi = df.loc[selected_idx].copy()
    xi = xi.sort_values(["position_label", score_col], ascending=[True, False])
    xi["selection_score"] = xi[score_col]
    total_cost = xi["now_cost_m"].sum()
    total_score = xi[score_col].sum()
    print(f"Best XI total selection score: {total_score:.2f}")
    print(f"Total cost: {total_cost:.1f}m (budget {budget_m:.1f}m)")
    return xi

best_xi = select_best_xi_ilp(snapshot_df, budget_m=100.0, max_from_team=3)
display(best_xi[[
    "player_id", "player_name", "team_short_name", "position_label",
    "now_cost_m", "pred_rf_full", "selection_score", "tier",
    "points_mean", "consistency_score"
]])


## N. Transfer Suggestor (Greedy, by player_id)

In [None]:
def suggest_transfers_greedy(
    snapshot_df,
    current_squad_ids,
    n_transfers=1,
    budget_m=100.0,
    max_from_team=3,
    use_active_filter=True,
    use_consistency_filter=True,
    score_col="score_for_xi",
):
    df = snapshot_df.copy()
    if use_active_filter and "roll_minutes_3wk" in df.columns:
        df = df[df["roll_minutes_3wk"] > 0].copy()
    if use_consistency_filter:
        df = apply_consistency_filter(df)
    if df.empty:
        raise ValueError("No players left after filters in suggest_transfers_greedy.")
    if score_col not in df.columns:
        df[score_col] = build_selection_score(df)
    if "now_cost_m" not in df.columns:
        raise ValueError("now_cost_m missing; needed for transfer suggestions.")
    df = df[df[score_col].notna()].copy()
    squad_mask = df["player_id"].isin(current_squad_ids)
    squad = df[squad_mask].copy()
    pool = df[~squad_mask].copy()
    if squad.empty:
        raise ValueError("No overlapping players between snapshot_df and current_squad_ids.")
    current_cost = squad["now_cost_m"].sum()
    team_counts = squad["team_short_name"].value_counts().to_dict()
    suggestions = []
    for _, row_out in squad.iterrows():
        pid_out = row_out["player_id"]
        cost_out = row_out["now_cost_m"]
        score_out = row_out[score_col]
        team_out = row_out["team_short_name"]
        pos_out = row_out["position_label"]
        pool_pos = pool[pool["position_label"] == pos_out]
        for _, row_in in pool_pos.iterrows():
            pid_in = row_in["player_id"]
            cost_in = row_in["now_cost_m"]
            score_in = row_in[score_col]
            team_in = row_in["team_short_name"]
            new_cost = current_cost - cost_out + cost_in
            if new_cost > budget_m:
                continue
            counts = team_counts.copy()
            counts[team_out] = counts.get(team_out, 0) - 1
            counts[team_in] = counts.get(team_in, 0) + 1
            if any(v > max_from_team for v in counts.values()):
                continue
            gain = score_in - score_out
            if gain <= 0:
                continue
            suggestions.append({
                "out_player_id": pid_out,
                "out_name": row_out["player_name"],
                "out_team": team_out,
                "out_pos": pos_out,
                "out_cost": cost_out,
                "out_score": score_out,
                "in_player_id": pid_in,
                "in_name": row_in["player_name"],
                "in_team": team_in,
                "in_pos": pos_out,
                "in_cost": cost_in,
                "in_score": score_in,
                "delta_cost": cost_in - cost_out,
                "delta_score": gain,
            })
    if not suggestions:
        print("No positive-gain single-player swaps found under constraints.")
        return pd.DataFrame()
    sugg_df = pd.DataFrame(suggestions).sort_values("delta_score", ascending=False)
    print(f"Top {min(20, len(sugg_df))} single-transfer suggestions (selection_score gain):")
    display(sugg_df.head(20))
    return sugg_df.head(50)

# Example:
# current_squad_ids = [...]
# transfer_suggestions = suggest_transfers_greedy(snapshot_df, current_squad_ids, n_transfers=1, budget_m=100.0)


## O. Starting XI Selector for a Given Squad

In [None]:
def select_starting_xi_for_squad(
    snapshot_df,
    current_squad_ids,
    max_from_team=3,
    use_active_filter=True,
    use_consistency_filter=True,
    score_col="score_for_xi",
):
    df = snapshot_df.copy()
    df = df[df["player_id"].isin(current_squad_ids)].copy()
    if df.empty:
        raise ValueError("No overlapping players between snapshot_df and current_squad_ids.")
    if use_active_filter and "roll_minutes_3wk" in df.columns:
        df = df[df["roll_minutes_3wk"] > 0].copy()
    if use_consistency_filter:
        df = apply_consistency_filter(df)
    if df.empty:
        raise ValueError("No players left in squad after filters.")
    if score_col not in df.columns:
        df[score_col] = build_selection_score(df)
    prob = pulp.LpProblem("Starting_XI", pulp.LpMaximize)
    indices = list(df.index)
    x = {i: pulp.LpVariable(f"x_{i}", cat="Binary") for i in indices}
    prob += pulp.lpSum(df.loc[i, score_col] * x[i] for i in indices)
    prob += pulp.lpSum(x[i] for i in indices) == 11
    pos_bounds = {
        "GK": (1, 1),
        "DEF": (3, 5),
        "MID": (2, 5),
        "FWD": (1, 3),
    }
    for pos, (min_c, max_c) in pos_bounds.items():
        idx_pos = df.index[df["position_label"] == pos].tolist()
        if idx_pos:
            prob += pulp.lpSum(x[i] for i in idx_pos) >= min_c
            prob += pulp.lpSum(x[i] for i in idx_pos) <= max_c
    for team, grp in df.groupby("team_short_name"):
        idx_team = grp.index.tolist()
        prob += pulp.lpSum(x[i] for i in idx_team) <= max_from_team
    _ = prob.solve(pulp.PULP_CBC_CMD(msg=False))
    selected_idx = [i for i in indices if pulp.value(x[i]) == 1]
    xi = df.loc[selected_idx].copy()
    xi = xi.sort_values(["position_label", score_col], ascending=[True, False])
    xi["selection_score"] = xi[score_col]
    print("Starting XI (within your squad):")
    display(xi[[
        "player_id", "player_name", "team_short_name", "position_label",
        "pred_rf_full", "selection_score", "tier",
        "points_mean", "consistency_score"
    ]])
    bench = df.loc[~df.index.isin(selected_idx)].copy()
    bench = bench.sort_values(score_col, ascending=False)
    print("Bench ordered by model score:")
    display(bench[[
        "player_id", "player_name", "team_short_name", "position_label",
        "pred_rf_full", score_col, "tier",
        "points_mean", "consistency_score"
    ]])
    return xi, bench

# Example:
# current_squad_ids = [...]
# starting_xi, bench = select_starting_xi_for_squad(snapshot_df, current_squad_ids)
