# Feature Engineering

In [1]:
from dataclasses import dataclass
from datetime import timedelta, datetime
import pandas as pd
import numpy as np

import sys
sys.path.append('..')

from src.features import build_features as buif

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
FILE_PATH = '../data/interim/eo_football_interim'
football = pd.read_csv(FILE_PATH)
football.head()

Unnamed: 0,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,...,HF,AF,HC,AC,HY,AY,HR,AR,SeasonLabel,Year
0,E0,2016-08-13,Burnley,Swansea,0,1,A,0,0,D,...,10,14,7,4,3,2,0,0,2016_2017,2016
1,E0,2016-08-13,Crystal Palace,West Brom,0,1,A,0,0,D,...,12,15,3,6,2,2,0,0,2016_2017,2016
2,E0,2016-08-13,Everton,Tottenham,1,1,D,1,0,H,...,10,14,5,6,0,0,0,0,2016_2017,2016
3,E0,2016-08-13,Hull,Leicester,2,1,H,1,0,H,...,8,17,5,3,2,2,0,0,2016_2017,2016
4,E0,2016-08-13,Man City,Sunderland,2,1,H,1,0,H,...,11,14,9,6,1,2,0,0,2016_2017,2016


In [4]:
football['Date'] = pd.to_datetime(football['Date'])
football.head()

Unnamed: 0,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,...,HF,AF,HC,AC,HY,AY,HR,AR,SeasonLabel,Year
0,E0,2016-08-13,Burnley,Swansea,0,1,A,0,0,D,...,10,14,7,4,3,2,0,0,2016_2017,2016
1,E0,2016-08-13,Crystal Palace,West Brom,0,1,A,0,0,D,...,12,15,3,6,2,2,0,0,2016_2017,2016
2,E0,2016-08-13,Everton,Tottenham,1,1,D,1,0,H,...,10,14,5,6,0,0,0,0,2016_2017,2016
3,E0,2016-08-13,Hull,Leicester,2,1,H,1,0,H,...,8,17,5,3,2,2,0,0,2016_2017,2016
4,E0,2016-08-13,Man City,Sunderland,2,1,H,1,0,H,...,11,14,9,6,1,2,0,0,2016_2017,2016


## Features to create

This are the features that are engineered from the datasets. Some features are generated season-wise while some are for the entire dataset. The dataset ranges from 2016 - 2021, season 2016 would be used as dummy data to engineer features for the rest seasons. This data should not be used for machine learning model creation.

**Running Average Features**
- HCLPOS, ACLPOS : Current league position for the home(away) team

In [5]:
league_tr = buif.LeaguePosAdder()
football = league_tr.fit(football)
current_league_table = league_tr.transform(football)
current_league_table

{'Man City': PositionStat(goals_for=83, goals_against=32, goals_diff=51, points=86, position=1),
 'Man United': PositionStat(goals_for=73, goals_against=44, goals_diff=29, points=74, position=2),
 'Liverpool': PositionStat(goals_for=68, goals_against=42, goals_diff=26, points=69, position=3),
 'Chelsea': PositionStat(goals_for=58, goals_against=36, goals_diff=22, points=67, position=4),
 'Leicester': PositionStat(goals_for=68, goals_against=50, goals_diff=18, points=66, position=5),
 'West Ham': PositionStat(goals_for=62, goals_against=47, goals_diff=15, points=65, position=6),
 'Tottenham': PositionStat(goals_for=68, goals_against=45, goals_diff=23, points=62, position=7),
 'Arsenal': PositionStat(goals_for=55, goals_against=39, goals_diff=16, points=61, position=8),
 'Everton': PositionStat(goals_for=47, goals_against=48, goals_diff=-1, points=59, position=9),
 'Leeds': PositionStat(goals_for=62, goals_against=54, goals_diff=8, points=59, position=10),
 'Aston Villa': PositionStat(go

In [6]:
football

Unnamed: 0,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,...,HC,AC,HY,AY,HR,AR,SeasonLabel,Year,HCLPOS,ACLPOS
0,E0,2016-08-13,Burnley,Swansea,0,1,A,0,0,D,...,7,4,3,2,0,0,2016_2017,2016,0,0
1,E0,2016-08-13,Crystal Palace,West Brom,0,1,A,0,0,D,...,3,6,2,2,0,0,2016_2017,2016,0,0
2,E0,2016-08-13,Everton,Tottenham,1,1,D,1,0,H,...,5,6,0,0,0,0,2016_2017,2016,0,0
3,E0,2016-08-13,Hull,Leicester,2,1,H,1,0,H,...,5,3,2,2,0,0,2016_2017,2016,0,0
4,E0,2016-08-13,Man City,Sunderland,2,1,H,1,0,H,...,9,6,1,2,0,0,2016_2017,2016,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1895,E0,2021-05-23,Liverpool,Crystal Palace,2,0,H,1,0,H,...,14,1,2,2,0,0,2020_2021,2021,5,14
1896,E0,2021-05-23,Man City,Everton,5,0,H,2,0,H,...,7,5,2,2,0,0,2020_2021,2021,1,9
1897,E0,2021-05-23,Sheffield United,Burnley,1,0,H,1,0,H,...,8,9,3,1,0,0,2020_2021,2021,20,17
1898,E0,2021-05-23,West Ham,Southampton,3,0,H,2,0,H,...,2,3,0,3,0,0,2020_2021,2021,6,15
