In [2]:
import pandas as pd

# 1. Load and Clean
df = pd.read_csv('/Users/tarun/FPL-Prediction/understat_match_1524.csv')
df.columns = df.columns.str.strip() # Fixes "id " space issue
df['date'] = pd.to_datetime(df['date'])

# 2. Create "Team-Centric" Data (Long Format)
# We rename columns to generic "goals_for", "goals_against" so we can stack them
home_df = df[['id', 'date', 'team_h', 'h_goals', 'a_goals', 'h_xg', 'a_xg']].copy()
home_df.columns = ['id', 'date', 'team', 'goals_for', 'goals_against', 'xg_for', 'xg_against']

away_df = df[['id', 'date', 'team_a', 'a_goals', 'h_goals', 'a_xg', 'h_xg']].copy()
away_df.columns = ['id', 'date', 'team', 'goals_for', 'goals_against', 'xg_for', 'xg_against']

team_stats = pd.concat([home_df, away_df]).sort_values(['team', 'date'])

# 3. Calculate Rolling Features (The Form)
# .shift() is CRITICAL: It ensures we only see the PAST, not the current game
rolling_cols = ['goals_for', 'goals_against', 'xg_for', 'xg_against']

for col in rolling_cols:
    team_stats[f'avg_{col}_last_5'] = team_stats.groupby('team')[col].transform(lambda x: x.shift(1).rolling(5).mean())

# 4. Calculate Rest Days
team_stats['prev_match_date'] = team_stats.groupby('team')['date'].shift(1)
team_stats['rest_days'] = (team_stats['date'] - team_stats['prev_match_date']).dt.days

# 5. Merge Features back to original Match Data
features = team_stats[['id', 'team', 'rest_days'] + [f'avg_{col}_last_5' for col in rolling_cols]]

# Merge Home Stats
df_final = df.merge(features, left_on=['id', 'team_h'], right_on=['id', 'team'])
df_final = df_final.rename(columns={c: f'h_{c}' for c in features.columns if c not in ['id', 'team']}).drop(columns=['team'])

# Merge Away Stats
df_final = df_final.merge(features, left_on=['id', 'team_a'], right_on=['id', 'team'])
df_final = df_final.rename(columns={c: f'a_{c}' for c in features.columns if c not in ['id', 'team']}).drop(columns=['team'])

# 6. Final Clean Up
df_final = df_final.dropna() # Drops the first 5 weeks where we have no history
print("Ready for Training!")

Ready for Training!
