In [5]:
import sqlite3
import pandas as pd
import numpy as np
from datetime import timedelta
import requests
from tqdm import tqdm

import sys
sys.path.append("..")

from src.fpl.pipelines.optimization_pipeline.fpl_api import get_fpl_base_data

In [2]:
connection = sqlite3.connect("../data/fpl.db")
raw_data = pd.read_sql_query("""SELECT *
                             FROM raw_fpl_data 
                             where full_name = 'Bukayo Saka'""", connection)
# raw_data["start"] = pd.to_datetime(raw_data["date"])
raw_data.head()

Unnamed: 0,season,round,element,full_name,team,position,fixture,opponent_team,opponent_team_name,total_points,...,ict_index,value,transfers_balance,selected,transfers_in,transfers_out,expected_goals,expected_goal_involvements,expected_assists,expected_goals_conceded
0,2018-2019,18.0,563,Bukayo Saka,,,171,4,,0.0,...,0.0,45.0,0.0,0.0,0.0,0.0,,,,
1,2018-2019,19.0,563,Bukayo Saka,,,181,3,,0.0,...,0.0,45.0,52.0,96.0,73.0,21.0,,,,
2,2018-2019,20.0,563,Bukayo Saka,,,196,12,,0.0,...,0.0,45.0,29.0,167.0,65.0,36.0,,,,
3,2018-2019,21.0,563,Bukayo Saka,,,201,9,,1.0,...,0.0,45.0,25.0,234.0,74.0,49.0,,,,
4,2018-2019,22.0,563,Bukayo Saka,,,220,19,,0.0,...,0.0,45.0,71.0,426.0,167.0,96.0,,,,


In [6]:
def fetch_player_fixtures(player_id: int, current_season: str) -> [pd.DataFrame, pd.DataFrame]:
    r = requests.get(f"https://fantasy.premierleague.com/api/element-summary/{player_id}/")
    data = r.json()
    history = pd.DataFrame(data["history"])
    fixtures = pd.DataFrame(data["fixtures"])
    fixtures["element"] = player_id
    fixtures['opponent_team'] = fixtures.apply(lambda row: row['team_a'] if row['is_home'] else row['team_h'], axis=1)
    fixtures = fixtures[["element", "kickoff_time", "opponent_team", "is_home", "id", "event"]]
    fixtures = fixtures.rename({"is_home": "was_home", "id": "fixture", "event": "round"}, axis=1)
    fixtures = pd.concat([history, fixtures])
    fixtures["season"] = current_season
    return fixtures


def get_current_season_fpl_data(current_season: str) -> pd.DataFrame:
    element_data, team_data, _, _ = get_fpl_base_data()
    element_data = element_data.query("team == 'Arsenal' and position == 'MID' and now_cost > 60")

    tqdm.pandas(desc="Fetching player history")
    all_fixtures = element_data["id"].progress_apply(fetch_player_fixtures, current_season=current_season)

    current_season_data = pd.concat(all_fixtures.values.tolist(), ignore_index=True)
    current_season_data = pd.merge(element_data, current_season_data, left_on="id", right_on="element")
    current_season_data["opponent_team_name"] = current_season_data["opponent_team"].map(team_data["name"].to_dict())
    current_season_data = current_season_data[[
        "season",
        "round",
        "element",
        "full_name",
        "team",
        "position",
        "fixture",
        "opponent_team",
        "opponent_team_name",
        "total_points",
        "was_home",
        "kickoff_time",
        "team_h_score",
        "team_a_score",
        "minutes",
        "goals_scored",
        "assists",
        "clean_sheets",
        "goals_conceded",
        "own_goals",
        "penalties_saved",
        "penalties_missed",
        "yellow_cards",
        "red_cards",
        "saves",
        "bonus",
        "bps",
        "influence",
        "creativity",
        "threat",
        "ict_index",
        "value",
        "transfers_balance",
        "selected",
        "transfers_in",
        "transfers_out",
        'expected_goals',
        'expected_goal_involvements', 
        'expected_assists',
        'expected_goals_conceded'
    ]]
    return current_season_data

current_season_data = get_current_season_fpl_data("2023-2024")

Fetching player history: 100%|██████████| 5/5 [00:02<00:00,  1.83it/s]


In [7]:
current_season_data

Unnamed: 0,season,round,element,full_name,team,position,fixture,opponent_team,opponent_team_name,total_points,...,ict_index,value,transfers_balance,selected,transfers_in,transfers_out,expected_goals,expected_goal_involvements,expected_assists,expected_goals_conceded
0,2023-2024,1,6,Kai Havertz,Arsenal,MID,2,16,Nott'm Forest,2.0,...,5.5,75.0,0.0,407159.0,0.0,0.0,0.05,0.29,0.24,1.18
1,2023-2024,2,6,Kai Havertz,Arsenal,MID,12,8,Crystal Palace,2.0,...,1.9,75.0,-63530.0,377081.0,8492.0,72022.0,0.02,0.07,0.05,1.06
2,2023-2024,3,6,Kai Havertz,Arsenal,MID,21,10,Fulham,1.0,...,4.2,74.0,-76821.0,308042.0,5577.0,82398.0,0.27,0.41,0.14,0.29
3,2023-2024,4,6,Kai Havertz,Arsenal,MID,31,14,Man Utd,2.0,...,4.1,73.0,-84114.0,228041.0,1637.0,85751.0,0.36,0.40,0.04,0.90
4,2023-2024,5,6,Kai Havertz,Arsenal,MID,43,9,Everton,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185,2023-2024,34,26,Leandro Trossard,Arsenal,MID,340,20,Wolves,,...,,,,,,,,,,
186,2023-2024,35,26,Leandro Trossard,Arsenal,MID,348,18,Spurs,,...,,,,,,,,,,
187,2023-2024,36,26,Leandro Trossard,Arsenal,MID,351,3,Bournemouth,,...,,,,,,,,,,
188,2023-2024,37,26,Leandro Trossard,Arsenal,MID,365,14,Man Utd,,...,,,,,,,,,,


In [145]:
columns_df1 = set(raw_data.columns)
columns_df2 = set(current_season_data.columns)

# Find columns that are only in df1
only_in_df1 = columns_df1 - columns_df2
print("Columns only in df1: ", only_in_df1)

# Find columns that are only in df2
only_in_df2 = columns_df2 - columns_df1
print("Columns only in df2: ", only_in_df2)


Columns only in df1:  set()
Columns only in df2:  set()


In [96]:
def refresh_fpl_names_mapping():
    

    elements_team = pd.read_csv("./data/raw/backtest_data/merged_gw.csv")[
        ["name", "team", "position"]
    ]
    latest_elements_team, _, _, _ = get_fpl_base_data()
    elements_team = latest_elements_team.merge(
        elements_team,
        left_on=["full_name", "name"],
        right_on=["name", "team"],
        suffixes=("", "_y"),
    )
    elements_team = (
        elements_team[["web_name", "short_name", "position"]]
        .drop_duplicates()
        .reset_index(drop=True)
    )

    path = "./data/raw/theFPLkiwi/FPL_projections_22_23/"
    files = [
        os.path.join(path, f)
        for f in os.listdir(path)
        if os.path.isfile(os.path.join(path, f))
    ]
    pred_pts_data = pd.DataFrame(columns=["FPL name", "Team", "Pos"])
    fpl_name_dict = pd.read_csv(
        "./data/raw/theFPLkiwi/ID_Dictionary.csv", encoding="cp1252"
    )[["Name", "FPL name"]]
    for file in files:
        df = pd.read_csv(file)
        df = fpl_name_dict.merge(df, on="Name")
        unique_rows = df[["FPL name", "Team", "Pos", "Price"]].drop_duplicates()
        pred_pts_data = pd.concat([pred_pts_data, unique_rows], ignore_index=True)

    pred_pts_data = pred_pts_data.rename({"FPL name": "pred_pts_fpl_name"}, axis=1)
    pred_pts_data = pred_pts_data.drop_duplicates(
        subset=["pred_pts_fpl_name", "Team", "Pos"], keep="last"
    ).reset_index(drop=True)

    tqdm.pandas(desc="Resolving FPL names in predicted pts data")
    pred_pts_data["matched"] = pred_pts_data.progress_apply(
        lambda row: fuzzy_match(row, elements_team), axis=1
    )
    pred_pts_data["same"] = (
        pred_pts_data["pred_pts_fpl_name"] == pred_pts_data["matched"]
    )
    pred_pts_data = pred_pts_data.sort_values(["same", "Price"], ascending=False)
    pred_pts_data = pred_pts_data.drop_duplicates(["Team", "matched"])
    pred_pts_data["fpl_name"] = pred_pts_data["matched"]
    pred_pts_data = pred_pts_data[["pred_pts_fpl_name", "fpl_name"]].reset_index(
        drop=True
    )

    pred_pts_data.to_csv(
        "./src/fpl/pipelines/model_pipeline/fpl_names_mapping.csv", index=False
    )
    return None