In [2]:
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np

from pathlib import Path
from typing import List


In [20]:
data_path = Path(".") / "Fantasy-Premier-League" / "data"

In [23]:
seasons = sorted([
    p.name for p in data_path.iterdir()
    if p.is_dir() and p.name[0].isdigit()
])
print(seasons)

['2016-17', '2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23', '2023-24', '2024-25', '2025-26']


In [24]:
seasons = ['2021-22', '2022-23', '2023-24', '2024-25', '2025-26']

In [28]:
def data_import(season_list: List[str]) -> pd.DataFrame:
    gws = np.arange(1, 39)
    dfs = []
    content = Path(".")

    for season in season_list:
        for gw in gws:
            try:
                file_name = f"gw{gw}.csv"
                path = content / "Fantasy-Premier-League" / "data" / season / "gws" / file_name

                df = pd.read_csv(path, encoding='latin1')

                df['gw'] = gw
                df['season'] = season

                if not df.empty:
                    dfs.append(df)

            except Exception as e:
                print(e)
                pass

    if not dfs:
        return pd.DataFrame()

    return pd.concat(dfs, axis=0, ignore_index=True)

In [29]:
data = data_import(season_list=seasons)

[Errno 2] No such file or directory: 'Fantasy-Premier-League\\data\\2025-26\\gws\\gw25.csv'
[Errno 2] No such file or directory: 'Fantasy-Premier-League\\data\\2025-26\\gws\\gw26.csv'
[Errno 2] No such file or directory: 'Fantasy-Premier-League\\data\\2025-26\\gws\\gw27.csv'
[Errno 2] No such file or directory: 'Fantasy-Premier-League\\data\\2025-26\\gws\\gw28.csv'
[Errno 2] No such file or directory: 'Fantasy-Premier-League\\data\\2025-26\\gws\\gw29.csv'
[Errno 2] No such file or directory: 'Fantasy-Premier-League\\data\\2025-26\\gws\\gw30.csv'
[Errno 2] No such file or directory: 'Fantasy-Premier-League\\data\\2025-26\\gws\\gw31.csv'
[Errno 2] No such file or directory: 'Fantasy-Premier-League\\data\\2025-26\\gws\\gw32.csv'
[Errno 2] No such file or directory: 'Fantasy-Premier-League\\data\\2025-26\\gws\\gw33.csv'
[Errno 2] No such file or directory: 'Fantasy-Premier-League\\data\\2025-26\\gws\\gw34.csv'
[Errno 2] No such file or directory: 'Fantasy-Premier-League\\data\\2025-26\\gws

In [27]:
data.head()

Unnamed: 0,name,position,team,xP,assists,bonus,bps,clean_sheets,creativity,element,fixture,goals_conceded,goals_scored,ict_index,influence,kickoff_time,minutes,opponent_team,own_goals,penalties_missed,penalties_saved,red_cards,round,saves,selected,team_a_score,team_h_score,threat,total_points,transfers_balance,transfers_in,transfers_out,value,was_home,yellow_cards,gw,season,expected_assists,expected_goal_involvements,expected_goals,expected_goals_conceded,starts,modified,mng_clean_sheets,mng_draw,mng_goals_scored,mng_loss,mng_underdog_draw,mng_underdog_win,mng_win,clearances_blocks_interceptions,defensive_contribution,recoveries,tackles
0,Eric Bailly,DEF,Man Utd,0.0,0,0,0,0,0.0,286,6,0,0,0.0,0.0,2021-08-14T11:30:00Z,0,10,0,0,0,0,1,0,9363,1,5,0.0,0,0,0,0,50,True,0,1,2021-22,,,,,,,,,,,,,,,,,
1,Keinan Davis,FWD,Aston Villa,0.4,0,0,0,0,0.0,49,8,0,0,0.0,0.0,2021-08-14T14:00:00Z,0,18,0,0,0,0,1,0,169789,2,3,0.0,0,0,0,0,45,False,0,1,2021-22,,,,,,,,,,,,,,,,,
2,Ayotomiwa Dele-Bashiru,MID,Watford,0.0,0,0,0,0,0.0,394,8,0,0,0.0,0.0,2021-08-14T14:00:00Z,0,2,0,0,0,0,1,0,4092,2,3,0.0,0,0,0,0,45,True,0,1,2021-22,,,,,,,,,,,,,,,,,
3,James Ward-Prowse,MID,Southampton,2.3,0,0,20,0,30.5,341,4,3,0,5.2,21.6,2021-08-14T14:00:00Z,90,8,0,0,0,0,1,0,299682,1,3,0.0,2,0,0,0,65,False,0,1,2021-22,,,,,,,,,,,,,,,,,
4,Bruno Miguel Borges Fernandes,MID,Man Utd,4.4,0,3,61,0,35.9,277,6,1,3,20.1,106.2,2021-08-14T11:30:00Z,90,10,0,0,0,0,1,0,3381004,1,5,59.0,20,0,0,0,120,True,0,1,2021-22,,,,,,,,,,,,,,,,,


In [38]:
def dtypes_conv(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()

    if 'kickoff_time' in df.columns:
        df['kickoff_time'] = pd.to_datetime(df['kickoff_time'], format='mixed', dayfirst=False)

    str_cols = df.select_dtypes(include=['object', 'string']).columns.difference(['kickoff_time'])
    df[str_cols] = df[str_cols].astype("string")

    numeric_cols = df.select_dtypes(exclude=['string', 'datetime64[ns]', 'datetimetz']).columns
    df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')

    return df

In [43]:
def sort_data(df:pd.DataFrame)->pd.DataFrame:
  return df.sort_values(by=["element", "kickoff_time", "gw", "season"])

In [44]:
data = dtypes_conv(df=data)
data = sort_data(df=data)

In [45]:
data.head()

Unnamed: 0,name,position,team,xP,assists,bonus,bps,clean_sheets,creativity,element,fixture,goals_conceded,goals_scored,ict_index,influence,kickoff_time,minutes,opponent_team,own_goals,penalties_missed,penalties_saved,red_cards,round,saves,selected,team_a_score,team_h_score,threat,total_points,transfers_balance,transfers_in,transfers_out,value,was_home,yellow_cards,gw,season,expected_assists,expected_goal_involvements,expected_goals,expected_goals_conceded,starts,modified,mng_clean_sheets,mng_draw,mng_goals_scored,mng_loss,mng_underdog_draw,mng_underdog_win,mng_win,clearances_blocks_interceptions,defensive_contribution,recoveries,tackles
500,Bernd Leno,GK,Arsenal,3.6,0,0,11,0,0.0,1,1,2,0,1.2,11.8,2021-08-13 19:00:00+00:00,90,3,0,0,0,0,1,1,153122,0,2,0.0,1,0,0,0,50,False,0,1,2021-22,,,,,,,,,,,,,,,,,
1064,Bernd Leno,GK,Arsenal,1.5,0,0,17,0,0.0,1,18,2,0,2.4,24.2,2021-08-22 15:30:00+00:00,90,6,0,0,0,0,2,3,144885,2,0,0.0,2,-25816,2627,28443,50,True,0,2,2021-22,,,,,,,,,,,,,,,,,
1641,Bernd Leno,GK,Arsenal,0.8,0,0,20,0,0.0,1,24,5,0,4.3,43.0,2021-08-28 11:30:00+00:00,90,12,0,0,0,0,3,5,136757,0,5,0.0,1,-14559,5164,19723,49,False,0,3,2021-22,,,,,,,,,,,,,,,,,
2239,Bernd Leno,GK,Arsenal,2.0,0,0,0,0,0.0,1,31,0,0,0.0,0.0,2021-09-11 14:00:00+00:00,0,15,0,0,0,0,4,0,153130,0,1,0.0,0,6564,35360,28796,49,True,0,4,2021-22,,,,,,,,,,,,,,,,,
2845,Bernd Leno,GK,Arsenal,1.8,0,0,0,0,0.0,1,43,0,0,0.0,0.0,2021-09-18 14:00:00+00:00,0,5,0,0,0,0,5,0,129851,1,0,0.0,0,-23847,4822,28669,49,False,0,5,2021-22,,,,,,,,,,,,,,,,,


In [46]:
data.shape

(127465, 54)