In [114]:
import pandas as pd
import os
import requests
import numpy as np
import sqlite3
pd.options.display.max_columns = None

In [9]:
def get_fpl_data():
    """Retrieve FPL data and process it."""
    try:
        r = requests.get("https://fantasy.premierleague.com/api/bootstrap-static/")
        fpl_data = r.json()

        elements = pd.DataFrame(fpl_data["elements"])
        elements = elements[
            ["id", "team", "element_type", "now_cost", "selected_by_percent"]
        ]
        elements["position"] = elements["element_type"].replace(
            {1: "GKP", 2: "DEF", 3: "MID", 4: "FWD"}
        )
        elements = elements.rename(columns={"selected_by_percent": "tsb"})
        elements["now_cost"] = np.round(elements["now_cost"] / 10, 1)

        teams = pd.DataFrame(fpl_data["teams"])
        teams = teams[["id", "name", "short_name"]]

        fpl_elements = elements.merge(teams, left_on="team", right_on="id", how="inner")
        fpl_elements = fpl_elements.rename(
            columns={"id_x": "fpl_id", "id_y": "team_id", "name": "team_name"}
        )
        fpl_elements = fpl_elements[
            [
                "fpl_id",
                "team_id",
                "team_name",
                "short_name",
                "element_type",
                "now_cost",
                "tsb",
                "position",
            ]
        ]
        return fpl_elements
    except Exception as e:
        print(f"Error retrieving FPL data: {e}")
        return pd.DataFrame()

In [102]:
#Get next GW
BASE_URL = "https://fantasy.premierleague.com/api"
with requests.Session() as session:
    static_url = f"{BASE_URL}/bootstrap-static/"
    static = session.get(static_url).json()
    next_gw = [x for x in static["events"] if x["is_next"]][0]["id"]

In [10]:
# Get FPL data
fpl_elements = get_fpl_data()
if fpl_elements.empty:
    print("FPL data retrieval failed. Exiting.")

In [149]:
# Connect to the SQLite database
conn = sqlite3.connect("C:/Users/erknud3/fpl-optimization/model/FBRef_DB/master.db")

print("Loading data from the database...")

# Load data from Match table
player_baselines = pd.read_sql_query(
    """
    select *
    from player_baselines
    """,
    conn,
)

skill = pd.read_sql_query(
    """
    select *
    from skill_and_cards
    """,
    conn,
)

attack_multipliers = pd.read_sql_query(
    """
    select *
    from attack_multipliers
    """,
    conn,
)

npxG_pred = pd.read_sql_query(
    """
    select *
    from npxG_pred
    """,
    conn,
)

gc_probs = pd.read_sql_query(
    """
    select *
    from gc_probabilities
    """,
    conn,
)

# Close the connection
conn.close()

pen_share = pd.read_csv("C:/Users/erknud3/fpl-optimization/model/data/Prediction_Data/pen_share.csv")

Loading data from the database...


In [153]:
player_baselines = player_baselines.merge(fpl_elements, on="fpl_id", how="left")

In [154]:
# Define position multipliers
position_multipliers = {
    1: 10,
    2: 6,
    3: 5,
    4: 4,
}  # GKP, DEF, MID, FWD multipliers

In [182]:
# Generate player_xp_goals
columns_to_keep = [
    "fpl_id",
    "web_name",
    "team_name",
    "team_id",
    "element_type",
    "position",
    "now_cost",
    "games",
    "minutes",
    "90s",
    "npxG",
    "xA",
    "npxG_baseline",
    "xA_baseline",
]


player_xp_goals = player_baselines[columns_to_keep].copy()

for gw in range(next_gw, next_gw + 12):
    gw_column = str(gw)
    player_xp_goals[gw_column] = player_xp_goals.apply(
        lambda row: row["npxG_baseline"]
        * skill.loc[
            skill["fpl_id"] == row["fpl_id"], "finishing_skill"
        ].values[0]
        * attack_multipliers.loc[
            attack_multipliers["team_id"] == row["team_id"], gw_column
        ].values[0]
        * position_multipliers[row["element_type"]],
        axis=1,
    )

player_xp_goals = player_xp_goals.round(2)
player_xp_goals = player_xp_goals.drop(["team_id", "element_type"], axis=1)

In [183]:
# Generate player_xp_pens
player_xp_pens = player_baselines[columns_to_keep].copy()

# Merge player_xp_pens with pen_share to get the penalty share for each player
player_xp_pens = player_xp_pens.merge(
    pen_share[["fpl_id", "pen_share"]], on="fpl_id", how="left"
)

# Fill missing pen_share values with 0 (players who don't take penalties)
player_xp_pens["pen_share"] = player_xp_pens["pen_share"].fillna(0)

for gw in range(next_gw, next_gw + 12):
    gw_column = str(gw)
    player_xp_pens[gw_column] = player_xp_pens.apply(
        lambda row: (
            (
                0.1
                * npxG_pred.loc[
                    npxG_pred["team_id"] == row["team_id"], gw_column
                ].values[0]
                * skill.loc[
                    skill["fpl_id"] == row["fpl_id"], "pen_skill"
                ].values[0]
                * 0.77
                * position_multipliers[row["element_type"]]
                * row["pen_share"]
            )
            if row["pen_share"] > 0
            else 0
        ),  # Ensure EV is 0 if pen_share is 0
        axis=1,
    )

player_xp_pens = player_xp_pens.round(2)
player_xp_pens = player_xp_pens.drop(["team_id", "element_type", "pen_share"], axis=1)

In [184]:
player_xp_assists = player_baselines[columns_to_keep].copy()

for gw in range(next_gw, next_gw + 12):
    gw_column = str(gw)
    player_xp_assists[gw_column] = player_xp_assists.apply(
        lambda row: row["xA_baseline"]
        * skill.loc[
            skill["fpl_id"] == row["fpl_id"], "assist_skill"
        ].values[0]
        * attack_multipliers.loc[
            attack_multipliers["team_id"] == row["team_id"], gw_column
        ].values[0]
        * position_multipliers[row["element_type"]],
        axis=1,
    )

player_xp_assists = player_xp_assists.round(2)
player_xp_assists = player_xp_assists.drop(["team_id", "element_type"], axis=1)

In [185]:
player_xp_cs = player_baselines[columns_to_keep].copy()

points_per_goal_scenario = {
    0: {1: 4, 2: 4, 3: 1, 4: 0},
    1: 0,
    2: {1: -1, 2: -1, 3: 0, 4: 0},
    4: {1: -2, 2: -2, 3: 0, 4: 0},
    6: {1: -3, 2: -3, 3: 0, 4: 0},
    8: {1: -4, 2: -4, 3: 0, 4: 0},
}

for gw in range(next_gw, next_gw + 12):
    cs_column = f"{gw}_0_goals"
    gc_1_column = f"{gw}_1_goals"
    gc_2_column = f"{gw}_2_goals"
    gc_4_column = f"{gw}_4_goals"
    gc_6_column = f"{gw}_6_goals"
    gc_8_column = f"{gw}_8_goals"

    player_xp_cs[str(gw)] = player_xp_cs.apply(
        lambda row: (
            gc_probs.loc[
                gc_probs["team_id"] == row["team_id"], cs_column
            ].values[0]
            * points_per_goal_scenario[0][row["element_type"]]
            # * player_xmins.loc[
            #     player_xmins["fpl_id"] == row["fpl_id"], "P(60_min)"
            # ].values[0]
            + gc_probs.loc[
                gc_probs["team_id"] == row["team_id"], gc_1_column
            ].values[0]
            * points_per_goal_scenario[1]
            + gc_probs.loc[
                gc_probs["team_id"] == row["team_id"], gc_2_column
            ].values[0]
            * points_per_goal_scenario[2][row["element_type"]]
            + gc_probs.loc[
                gc_probs["team_id"] == row["team_id"], gc_4_column
            ].values[0]
            * points_per_goal_scenario[4][row["element_type"]]
            + gc_probs.loc[
                gc_probs["team_id"] == row["team_id"], gc_6_column
            ].values[0]
            * points_per_goal_scenario[6][row["element_type"]]
            + gc_probs.loc[
                gc_probs["team_id"] == row["team_id"], gc_8_column
            ].values[0]
            * points_per_goal_scenario[8][row["element_type"]]
        ),
        axis=1,
    )

player_xp_cs = player_xp_cs.round(2)
player_xp_cs = player_xp_cs.drop(["team_id", "element_type"], axis=1)

In [193]:
common_columns = [
    "fpl_id",
    "web_name",
    "team_name",
    "position",
    "now_cost",
    "games",
    "minutes",
    "90s",
    "npxG",
    "xA",
    "npxG_baseline",
    "xA_baseline"
]

def rename_gameweek_columns(df, prefix):
    df_prefixed = df.copy()
    # Rename only the gameweek columns with the provided prefix
    gameweek_columns = [col for col in df.columns if col not in common_columns]
    df_prefixed.rename(
        columns={col: f"{prefix}_{col}" for col in gameweek_columns},
        inplace=True,
    )
    return df_prefixed

In [194]:
player_xp_goals_prefixed = rename_gameweek_columns(player_xp_goals, "goals")
player_xp_pens_prefixed = rename_gameweek_columns(player_xp_pens, "pens")
player_xp_assists_prefixed = rename_gameweek_columns(player_xp_assists, "assists")
player_xp_cs_prefixed = rename_gameweek_columns(player_xp_cs, "cs")
#player_xp_app_prefixed = rename_gameweek_columns(player_xp_app, "app")

In [195]:
# Step 3: Merge the dataframes on the common columns
merged_df = player_xp_goals_prefixed.merge(
    player_xp_pens_prefixed, on=common_columns, how="outer"
)
merged_df = merged_df.merge(
    player_xp_assists_prefixed, on=common_columns, how="outer"
)
merged_df = merged_df.merge(
    player_xp_cs_prefixed, on=common_columns, how="outer"
)

In [196]:
# Step 4: Add total columns (sum of goals, pens, assists, cs for each gameweek)
for gw in range(next_gw, num_gameweeks + 1):
    gw_cols = [
        f"goals_{gw}",
        f"pens_{gw}",
        f"assists_{gw}",
        f"cs_{gw}",
        # f"app_{gw}",
    ]
    # Check if these columns exist in the dataframe (in case some columns are missing for certain gameweeks)
    if all(col in merged_df.columns for col in gw_cols):
        merged_df[f"total_{gw}"] = merged_df[gw_cols].sum(axis=1)

# Step 5: Reorder the columns so common columns come first and the gameweek columns follow in a specific order
# Extract all the columns from merged_df
all_columns = merged_df.columns.tolist()

# Separate common columns and gameweek columns
gw_columns = [col for col in all_columns if col not in common_columns]

# Desired order within each gameweek
metric_order = [
    "goals", 
    "pens", 
    "assists", 
    "cs", 
    # "app", 
    "total"
    ]

# Function to extract the numerical part of the column names
def extract_number(col):
    try:
        return int(col.split("_")[1])
    except (IndexError, ValueError):
        return float("inf")

# Sort the gameweek columns first by gameweek number, then by the desired metric order
ordered_gw_columns = sorted(
    gw_columns,
    key=lambda x: (extract_number(x), metric_order.index(x.split("_")[0])),
)

# Order: common_columns first, then the ordered gameweek columns
ordered_columns = common_columns + ordered_gw_columns

# Reorder the merged dataframe
merged_df = merged_df[ordered_columns]

In [204]:
totals_columns = [f"total_{gw}" for gw in range(next_gw, next_gw + 12)]

new_ordered_columns = common_columns + totals_columns

merged_df_totals = merged_df[new_ordered_columns]

# Rename columns by removing the 'total_' prefix
merged_df_totals.columns = [
    col.replace("total_", "") for col in merged_df_totals.columns
]

In [205]:
merged_df_totals.sort_values(by="5", ascending=False).head(10)

Unnamed: 0,fpl_id,web_name,team_name,position,now_cost,games,minutes,90s,npxG,xA,npxG_baseline,xA_baseline,5,6,7,8,9,10,11,12,13,14,15,16
209,328,M.Salah,Liverpool,MID,12.7,4,352.0,3.91,0.49,0.38,0.53,0.29,6.49,5.4,4.62,5.71,2.93,5.58,6.1,6.6,3.69,4.84,5.09,6.53
317,503,Son,Spurs,MID,10.0,4,359.0,3.99,0.3,0.15,0.33,0.25,4.83,3.92,3.3,5.39,3.5,4.62,5.17,2.18,4.98,3.84,4.37,5.04
107,182,Palmer,Chelsea,MID,10.6,4,352.0,3.91,0.23,0.43,0.36,0.37,4.69,4.71,5.1,3.24,5.18,4.37,3.15,5.06,5.13,5.59,3.95,5.33
202,317,Diogo J.,Liverpool,MID,7.6,4,283.0,3.14,0.57,0.22,0.52,0.19,4.59,3.83,3.29,4.04,2.08,3.96,4.3,4.68,2.61,3.4,3.61,4.63
198,311,Alexander-Arnold,Liverpool,DEF,7.0,4,312.0,3.47,0.06,0.43,0.07,0.32,4.07,3.53,3.06,3.47,1.7,3.52,3.71,4.39,2.18,2.58,3.18,4.24
208,327,Luis Díaz,Liverpool,MID,7.8,4,286.0,3.18,0.6,0.31,0.39,0.2,3.92,3.28,2.81,3.44,1.78,3.38,3.67,4.01,2.23,2.89,3.08,3.96
201,316,Darwin,Liverpool,FWD,7.2,3,65.0,0.72,0.14,0.0,0.59,0.23,3.85,3.17,2.69,3.4,1.72,3.3,3.63,3.88,2.17,2.91,3.01,3.85
76,121,Enciso,Brighton,MID,5.4,3,42.0,0.47,0.21,0.21,0.34,0.27,3.76,2.76,3.7,2.96,4.24,2.32,2.28,3.12,5.18,3.16,3.73,3.66
121,199,Eze,Crystal Palace,MID,6.9,4,360.0,4.0,0.38,0.12,0.27,0.17,3.6,2.79,2.62,2.59,3.26,2.94,3.59,2.59,3.33,3.04,2.02,2.39
322,509,Werner,Spurs,MID,6.3,3,41.0,0.46,0.0,0.0,0.39,0.16,3.47,2.79,2.37,3.86,2.52,3.31,3.81,1.55,3.58,2.75,3.13,3.64


In [206]:
merged_df_totals.loc[:, 'total'] = merged_df_totals.loc[:, str(next_gw):str(next_gw + 11)].sum(axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df_totals.loc[:, 'total'] = merged_df_totals.loc[:, str(next_gw):str(next_gw + 11)].sum(axis=1)


In [208]:
merged_df_totals.sort_values(by="total", ascending=False).head(20)

Unnamed: 0,fpl_id,web_name,team_name,position,now_cost,games,minutes,90s,npxG,xA,npxG_baseline,xA_baseline,5,6,7,8,9,10,11,12,13,14,15,16,total
209,328,M.Salah,Liverpool,MID,12.7,4,352.0,3.91,0.49,0.38,0.53,0.29,6.49,5.4,4.62,5.71,2.93,5.58,6.1,6.6,3.69,4.84,5.09,6.53,63.58
107,182,Palmer,Chelsea,MID,10.6,4,352.0,3.91,0.23,0.43,0.36,0.37,4.69,4.71,5.1,3.24,5.18,4.37,3.15,5.06,5.13,5.59,3.95,5.33,55.5
226,351,Haaland,Man City,FWD,15.2,4,359.0,3.99,1.03,0.03,0.74,0.14,3.1,4.16,5.52,4.5,7.04,4.3,3.7,5.15,3.29,5.04,3.89,5.65,55.34
220,345,De Bruyne,Man City,MID,9.6,4,356.0,3.96,0.33,0.43,0.23,0.52,3.17,4.04,5.44,4.47,6.88,4.24,3.66,4.98,3.17,5.03,3.92,5.48,54.48
317,503,Son,Spurs,MID,10.0,4,359.0,3.99,0.3,0.15,0.33,0.25,4.83,3.92,3.3,5.39,3.5,4.62,5.17,2.18,4.98,3.84,4.37,5.04,51.14
13,17,Saka,Arsenal,MID,10.0,4,341.0,3.79,0.4,0.37,0.28,0.26,2.03,5.21,5.69,3.54,3.36,3.33,3.1,4.17,3.83,4.51,3.57,4.48,46.82
110,186,Sterling,Arsenal,MID,6.8,1,11.0,0.12,0.0,0.0,0.4,0.22,1.97,5.04,5.49,3.42,3.26,3.22,3.01,4.03,3.7,4.36,3.45,4.34,45.29
202,317,Diogo J.,Liverpool,MID,7.6,4,283.0,3.14,0.57,0.22,0.52,0.19,4.59,3.83,3.29,4.04,2.08,3.96,4.3,4.68,2.61,3.4,3.61,4.63,45.02
223,348,Foden,Man City,MID,9.3,1,45.0,0.5,0.0,0.0,0.32,0.24,2.52,3.16,4.27,3.53,5.4,3.34,2.89,3.91,2.48,3.96,3.1,4.28,42.84
63,99,Mbeumo,Brentford,MID,7.1,4,359.0,3.99,0.28,0.33,0.29,0.21,2.75,4.2,4.07,3.04,4.09,3.06,3.82,3.05,4.56,2.81,3.63,2.65,41.73
