In [5]:
import sqlite3
import pandas as pd
import numpy as np
from scipy.stats import poisson
import requests

In [25]:
# Connect to the SQLite database
conn = sqlite3.connect("C:/Users/erknud3/fpl-optimization/model/FBRef_DB/master.db")

print("Loading data from the database...")

# Load data from Match table
team_baselines = pd.read_sql_query(
    """
    SELECT team_id, team, npxG_baseline, npxGC_baseline
    FROM team_baselines
    """,
    conn,
)

# Close the connection
conn.close()

Loading data from the database...


In [26]:
r = requests.get("https://fantasy.premierleague.com/api/fixtures/")
fixtures = pd.DataFrame(r.json())
fixtures = fixtures[["event", "team_h", "team_a"]]

In [27]:
avg_npxG = team_baselines["npxG_baseline"].mean()
avg_npxGC = team_baselines["npxGC_baseline"].mean()

In [28]:
 # Step 1: Calculate and Round Attack and Defense Multipliers
team_baselines["attack_multiplier"] = np.round((
    team_baselines["npxG_baseline"] / avg_npxG
), 2)
team_baselines["defense_multiplier"] = np.round((
    team_baselines["npxGC_baseline"] / avg_npxGC
), 2)

In [39]:
npxG_pred = team_baselines[['team_id', 'team', 'npxG_baseline', 'attack_multiplier']].copy()
npxGC_pred = team_baselines[['team_id', 'team', 'npxGC_baseline', 'defense_multiplier']].copy()

# Step 3: Initialize New Columns
for gw in range(1, 39):
    npxG_pred.loc[:, gw] = 0.0
    npxGC_pred.loc[:, gw] = 0.0

In [40]:
# Step 4: Process Fixtures and Populate Gameweek Columns
for index, row in fixtures.iterrows():
    gw = row["event"]
    team_h = row["team_h"]
    team_a = row["team_a"]
    
# Get attack and defense multipliers
    home_attack = npxG_pred.loc[
        npxG_pred["team_id"] == team_h, "attack_multiplier"
    ].values[0]
    away_attack = npxG_pred.loc[
        npxG_pred["team_id"] == team_a, "attack_multiplier"
    ].values[0]
    home_defense = npxGC_pred.loc[
        npxGC_pred["team_id"] == team_h, "defense_multiplier"
    ].values[0]
    away_defense = npxGC_pred.loc[
        npxGC_pred["team_id"] == team_a, "defense_multiplier"
    ].values[0]

    # Get baseline npxG and npxGC
    home_npxG = npxG_pred.loc[
        npxG_pred["team_id"] == team_h, "npxG_baseline"
    ].values[0]
    away_npxG = npxG_pred.loc[
        npxG_pred["team_id"] == team_a, "npxG_baseline"
    ].values[0]
    home_npxGC = npxGC_pred.loc[
        npxGC_pred["team_id"] == team_h, "npxGC_baseline"
    ].values[0]
    away_npxGC = npxGC_pred.loc[
        npxGC_pred["team_id"] == team_a, "npxGC_baseline"
    ].values[0]

    # Adjust for home/away and opponent strength, then round to 2 decimals
    npxG_pred.loc[
        npxG_pred["team_id"] == team_h, gw
    ] = round(home_npxG * away_defense * 1.12, 2)
    npxG_pred.loc[
        npxG_pred["team_id"] == team_a, gw
    ] = round(away_npxG * home_defense * 0.88, 2)

    npxGC_pred.loc[
        npxGC_pred["team_id"] == team_h, gw
    ] = round(home_npxGC * away_attack * 0.88, 2)
    npxGC_pred.loc[
        npxGC_pred["team_id"] == team_a, gw
    ] = round(away_npxGC * home_attack * 1.12, 2)

# Step 5: Drop the attack_multiplier and defense_multiplier columns
npxG_pred.drop(columns=["attack_multiplier"], inplace=True)
npxGC_pred.drop(columns=["defense_multiplier"], inplace=True)

In [44]:
attack_multipliers = npxG_pred.copy()

for gw in range(1, 39):
    attack_multipliers[gw] = np.round((
        attack_multipliers[gw] / attack_multipliers["npxG_baseline"]
    ), 2)

In [46]:
# Step 1: Initialize an empty DataFrame to store the probabilities
columns = ["team_id", "Squad"] + [
    f"{gw}_{goals}_goals" for gw in range(1, 39) for goals in range(9)
]
team_concede_probs = pd.DataFrame(columns=columns)

In [48]:
# Step 2: Calculate probabilities for each team and gameweek
for idx, row in npxGC_pred.iterrows():
    team_id = row["team_id"]
    team = row["team"]

    probabilities = []

    for gw in range(1, 39):
        mean_goals_conceded = row[gw]
        probs = poisson.pmf(range(9), mean_goals_conceded).round(4)
        probabilities.extend(probs)

    # Insert the data into the new DataFrame
    team_concede_probs.loc[idx] = [team_id, team] + list(probabilities)

In [51]:
# Initialize a new DataFrame to store the cumulative probabilities
gc_probabilities = pd.DataFrame(
    columns=["team_id", "Squad"]
    + [
        f"{gw}_{concede}_goals"
        for gw in range(1, 39)
        for concede in [0, 1, 2, 4, 6, 8]
    ]
)

In [52]:
# Calculate cumulative probabilities for each team and gameweek
for idx, row in team_concede_probs.iterrows():
    team_id = row["team_id"]
    squad = row["Squad"]

    cum_probs = []

    for gw in range(1, 39):
        prob_0 = row[f"{gw}_0_goals"]
        prob_1 = row[f"{gw}_1_goals"]
        prob_2 = row[f"{gw}_2_goals"]
        prob_4 = row[f"{gw}_4_goals"]
        prob_6 = row[f"{gw}_6_goals"]
        prob_8_or_more = row[f"{gw}_8_goals"] + sum(
            row[f"{gw}_{k}_goals"] for k in range(9, len(row) // 38)
        )

        cum_probs.extend([prob_0, prob_1, prob_2, prob_4, prob_6, prob_8_or_more])

    # Insert the data into the new DataFrame
    gc_probabilities.loc[idx] = [team_id, squad] + cum_probs

Unnamed: 0,team_id,Squad,1_0_goals,1_1_goals,1_2_goals,1_4_goals,1_6_goals,1_8_goals,2_0_goals,2_1_goals,...,37_2_goals,37_4_goals,37_6_goals,37_8_goals,38_0_goals,38_1_goals,38_2_goals,38_4_goals,38_6_goals,38_8_goals
0,1,Arsenal,0.5599,0.3247,0.0942,0.0026,0.0,0.0,0.3606,0.3678,...,0.1606,0.0104,0.0003,0.0,0.5488,0.3293,0.0988,0.003,0.0,0.0
1,2,Aston Villa,0.2441,0.3442,0.2427,0.0402,0.0027,0.0001,0.2187,0.3324,...,0.2466,0.0432,0.003,0.0001,0.1827,0.3106,0.264,0.0636,0.0061,0.0003
2,3,Bournemouth,0.2441,0.3442,0.2427,0.0402,0.0027,0.0001,0.208,0.3266,...,0.2633,0.1212,0.0223,0.0022,0.4232,0.3639,0.1565,0.0096,0.0002,0.0
3,4,Brentford,0.343,0.367,0.1964,0.0187,0.0007,0.0,0.0889,0.2152,...,0.2046,0.0214,0.0009,0.0,0.278,0.3559,0.2278,0.0311,0.0017,0.0
4,5,Brighton,0.2808,0.3567,0.2265,0.0304,0.0016,0.0,0.2982,0.3608,...,0.262,0.0602,0.0055,0.0003,0.1882,0.3144,0.2625,0.061,0.0057,0.0003
5,6,Chelsea,0.192,0.3169,0.2614,0.0593,0.0054,0.0003,0.3135,0.3636,...,0.2252,0.0298,0.0016,0.0,0.2865,0.3581,0.2238,0.0291,0.0015,0.0
6,7,Crystal Palace,0.2165,0.3313,0.2534,0.0494,0.0039,0.0002,0.3499,0.3674,...,0.1687,0.0119,0.0003,0.0,0.1086,0.2411,0.2676,0.1099,0.0181,0.0016
7,8,Everton,0.2491,0.3462,0.2406,0.0387,0.0025,0.0001,0.1423,0.2774,...,0.1523,0.009,0.0002,0.0,0.1367,0.272,0.2707,0.0893,0.0118,0.0008
8,9,Fulham,0.1653,0.2975,0.2678,0.0723,0.0078,0.0005,0.4232,0.3639,...,0.264,0.0636,0.0061,0.0003,0.1572,0.2909,0.2691,0.0767,0.0088,0.0005
9,10,Ipswich Town,0.1481,0.2828,0.2701,0.0821,0.01,0.0007,0.0983,0.228,...,0.1981,0.0193,0.0007,0.0,0.3166,0.3641,0.2094,0.0231,0.001,0.0
