In [32]:
import pandas as pd
import glob
import os


In [43]:
def load_gws(date):
    """
    Load gameweek data from CSV files for the specified dates.
    """
    gw_folder = rf"c:\Users\theoi\Documents\python stuff\FPL\FPL_dat_ pro\data\{date}\gws"
    gw_files = glob.glob(os.path.join(gw_folder, "gw*.csv"))
    df_list = [pd.read_csv(f) for f in gw_files if "merged" not in f]
    gws = pd.concat(df_list, ignore_index=True)
    return gws

def load_fixtures(date):
    """
    Load fixture data from CSV files for the specified dates.
    """
    return  pd.read_csv(rf"c:\Users\theoi\Documents\python stuff\FPL\FPL_dat_ pro\data\{date}\fixtures.csv")
    

def load_teams(date):
    """
    Load team data from CSV files for the specified dates.
    """
    teams=  pd.read_csv(rf"c:\Users\theoi\Documents\python stuff\FPL\FPL_dat_ pro\data\{date}\teams.csv")
    team_id_to_name = dict(zip(teams['id'], teams['name']))
    
    return teams,team_id_to_name

def get_team(row):
    if row['was_home']:
        return row['team_h']
    else:
        return row['team_a']

In [45]:
def processing(gws, fixtures,team_id_to_name):
    """
    Process gameweek data by merging with fixture data. 
    """
    
    merged = pd.merge(
        gws,
        fixtures,
        left_on=['kickoff_time', 'opponent_team'],
        right_on=['kickoff_time', 'team_h'],
        how='left',
        suffixes=('', '_fixture')
    ) 
    
    mask = merged['id'].isna()
    if mask.any():
        unmatched = merged[mask]
        # Only drop fixture columns that are not merge keys
        merge_keys = ['kickoff_time', 'opponent_team']
        drop_cols = [col for col in fixtures.columns if col in unmatched.columns and col not in merge_keys]
        unmatched_clean = unmatched.drop(columns=drop_cols)
        matched = pd.merge(
            unmatched_clean,
            fixtures,
            left_on=['kickoff_time', 'opponent_team'],
            right_on=['kickoff_time', 'team_a'],
            how='left',
            suffixes=('', '_fixture')
        )
        # Only assign columns that exist in both DataFrames
        assign_cols = [col for col in fixtures.columns if col in matched.columns and col in merged.columns]
        merged.loc[mask, assign_cols] = matched[assign_cols].values
    merged['team'] = merged.apply(get_team, axis=1)
    merged['team_name'] = merged['team'].map(team_id_to_name)
    merged['opponent_team_name'] = merged['opponent_team'].map(team_id_to_name)
    return merged


In [48]:


dates = ['2019-20', '2020-21', '2021-22', '2022-23', '2023-24','2024-25']
dfs= []
for date in dates:
    gws = load_gws(date)
    fixtures = load_fixtures(date)
    teams, team_id_to_name = load_teams(date)
    dfs.append(processing(gws,fixtures,team_id_to_name))

In [55]:
check = dfs[5]

check[check['team_name'] == 'Fulham']  # Example check for team name in the last DataFrame

Unnamed: 0,name,position,team,xP,assists,bonus,bps,clean_sheets,creativity,element,...,team_a,team_a_score_fixture,team_h,team_h_score_fixture,stats,team_h_difficulty,team_a_difficulty,pulse_id,team_name,opponent_team_name
32,Harry Wilson,MID,9.0,2.0,0,0,4,0,3.9,259,...,9.0,0.0,14.0,1.0,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",3.0,3.0,115827.0,Fulham,Man Utd
105,Tim Ream,DEF,9.0,0.0,0,0,0,0,0.0,253,...,9.0,0.0,14.0,1.0,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",3.0,3.0,115827.0,Fulham,Man Utd
135,Ryan Sessegnon,DEF,9.0,1.9,0,0,0,0,0.0,579,...,9.0,0.0,14.0,1.0,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",3.0,3.0,115827.0,Fulham,Man Utd
141,Timothy Castagne,DEF,9.0,1.9,0,0,0,0,0.0,244,...,9.0,0.0,14.0,1.0,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",3.0,3.0,115827.0,Fulham,Man Utd
160,Martial Godo,MID,9.0,1.0,0,0,0,0,0.0,605,...,9.0,0.0,14.0,1.0,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",3.0,3.0,115827.0,Fulham,Man Utd
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27512,Reiss Nelson,MID,9.0,2.0,0,0,6,0,16.3,10,...,9.0,1.0,8.0,1.0,"[{'identifier': 'goals_scored', 'a': [{'value'...",3.0,3.0,115913.0,Fulham,Everton
27521,Jorge Cuenca Barreno,DEF,9.0,0.3,0,0,1,0,0.0,586,...,9.0,1.0,8.0,1.0,"[{'identifier': 'goals_scored', 'a': [{'value'...",3.0,3.0,115913.0,Fulham,Everton
27553,Rodrigo Muniz Carvalho,FWD,9.0,2.3,0,0,3,0,0.1,251,...,9.0,1.0,8.0,1.0,"[{'identifier': 'goals_scored', 'a': [{'value'...",3.0,3.0,115913.0,Fulham,Everton
27585,Sander Berge,MID,9.0,2.0,0,0,15,0,12.5,622,...,9.0,1.0,8.0,1.0,"[{'identifier': 'goals_scored', 'a': [{'value'...",3.0,3.0,115913.0,Fulham,Everton
