In [93]:
import pandas as pd
from constants import GW_HISTORY_FILE
from utils import *
from scipy.stats import poisson

In [102]:
def expected(pmf, value, max_value):
    xp = 0
    for x in range(1, max_value + 1):
        xp += value * x * pmf(x)
    return xp

def predict_points(position, apxG, apxA, amins, abonus, atxG, atxGA, aoxG, aoxGA):
    params = from_json("../data/params.json")
    alpha = params[position]["alpha"]
    beta = params[position]["beta"]
    goal_value = params[position]["goal_value"]
    assist_value = params[position]["assist_value"]
    clean_sheet_value = params[position]["clean_sheet_value"]
    tgcv = params[position]["two_goals_conceded_value"]
    xp = 0

    def goal_pmf(x): return alpha * poisson(apxG).pmf(x) + \
        (1 - alpha) * poisson(aoxGA).pmf(x)
    def assist_pmf(x): return alpha * poisson(apxA).pmf(x) + \
        (1 - alpha) * poisson(aoxGA).pmf(x)

    def bonus_pmf(x): return poisson(abonus).pmf(x)
    def cs_pmf(x): return beta * poisson(atxGA).pmf(0) + \
        (1 - beta) * poisson(aoxG).pmf(0)

    def concede_pmf(x): return poisson(atxGA).pmf(2 * x)
    # print("clean sheet probability with beta = {}: {}".format(beta, cs_pmf(0)))
    xp += expected(goal_pmf, goal_value, 4)
    xp += expected(assist_pmf, assist_value, 4)
    xp += expected(bonus_pmf, 1, 3)
    xp += expected(cs_pmf, clean_sheet_value, 1)
    xp += expected(concede_pmf, tgcv, 4)
    xp += int(amins > 0)
    xp += int(amins > 59)
    return xp

def predict(row):
    return predict_points(
        row["position"],
        row["anpxG"],
        row["axA"],
        row["aminutes_played"],
        row["abonus"],
        row["avg_team_xG"],
        row["avg_team_xGA"],
        row["avg_opponent_xG"],
        row["avg_opponent_xGA"]
    )

In [164]:
df = pd.read_csv(GW_HISTORY_FILE)
def get_ema(df, player, col, alpha):
    ema = df[df["player_name"] == player][col].ewm(alpha=alpha, ignore_na=True).mean()
    ema = ema.to_numpy()
    ema = np.roll(ema, 1)
    ema[0] = None
    ema = np.round(ema, 3)
    return ema
for player in df["player_name"].unique():
    df.loc[df["player_name"] == player, "anpxG"] = get_ema(df, player, "npxG", 0.3)
    df.loc[df["player_name"] == player, "axA"] = get_ema(df, player, "xA", 0.3)


# rows = df.loc[df["player_name"] == "Bukayo Saka"].copy()
rows = df[df["gameweek"] == 30].copy()
rows["xP"] = rows.apply(predict, axis=1)

In [166]:
# df2.head(20)
rows = rows.groupby(by=["player_name", "gameweek"]).sum().reset_index()
df2 = rows[["player_name", "gameweek", "total_points", "xP"]].sort_values(by="xP", ascending=False)
df2.head(20)

Unnamed: 0,player_name,gameweek,total_points,xP
71,Harry Kane,30.0,13.0,7.243427
163,Raphael Dias Belloli,30.0,0.0,6.173178
81,Ivan Toney,30.0,2.0,6.038234
159,Philippe Coutinho Correia,30.0,2.0,5.35393
138,Matt Doherty,30.0,2.0,5.247918
154,Pascal Struijk,30.0,0.0,5.187135
139,Matthew Cash,30.0,2.0,5.046278
47,Dejan Kulusevski,30.0,2.0,4.694686
75,Heung-Min Son,30.0,15.0,4.660696
73,Harvey Barnes,30.0,5.0,4.544942
