In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("data/data_rank.csv")

In [3]:
pd.options.mode.chained_assignment = None  # default='warn'


In [4]:
#team_goals_mean, team_goals_suf_mean, team_game_points_mean, team_goals_mean_l5, team_goals_suf_mean_l5, team_game_points_mean_l5

In [5]:
def feats_calc(df, team):
    
    tdf = df[(df.home_team == team) | (df.away_team == team)].copy()
    
    tdf['team_score'] = tdf.apply(lambda x: x['home_score'] if x['home_team'] == team else x['away_score'], axis=1)
    tdf['team_suf_score'] = tdf.apply(lambda x: x['away_score'] if x['home_team'] == team else x['home_score'], axis=1)
    
    tdf['delta_score'] = tdf.apply(lambda x: x['home_score']-x['away_score'], axis=1)
    tdf['team_side'] = tdf.apply(lambda x: "away" if team == x['away_team'] else "home", axis=1)
    
    tdf['game_points'] = tdf.apply(lambda x: 1 if x['delta_score'] == 0 
                                   else 3 if ((x['delta_score']>0) and x['team_side']=='home') | ((x['delta_score']<0) and (x['team_side']=='away'))
                                  else 0, axis=1)
    
    tdf['team_rank'] = tdf.apply(lambda x: x['rank_home'] if x['home_team'] == team else x['rank_away'], axis=1)
    tdf['opp_rank'] = tdf.apply(lambda x: x['rank_away'] if x['home_team'] == team else x['rank_home'], axis=1)
    
    
    team_rank = tdf.team_rank.tail(1).values[0]
    team_goals_mean = tdf.team_score.mean()
    team_goals_suf_mean = tdf.team_suf_score.mean()
    
    team_game_points_mean = tdf.game_points.mean()
    
    team_goals_mean_l5 = tdf.tail(5).team_score.mean()
    team_goals_suf_mean_l5 = tdf.tail(5).team_suf_score.mean()
    
    team_game_points_mean_l5 = tdf.tail(5).game_points.mean()
    
    team_goals_by_rank = (tdf.team_score/tdf.opp_rank).mean()
    team_goals_by_rank_l5 = (tdf.tail(5).team_score/tdf.tail(5).opp_rank).mean()
    
    team_goals_suf_by_rank = (tdf.team_suf_score*tdf.opp_rank).mean()
    team_goals_suf_by_rank_l5 = (tdf.tail(5).team_suf_score*tdf.tail(5).opp_rank).mean()
    
    team_game_points_rank_mean = (tdf.game_points/tdf.opp_rank).mean()
    team_game_points_rank_mean_l5 = (tdf.tail(5).game_points/tdf.tail(5).opp_rank).mean()
    
    return [team, team_goals_mean, team_goals_suf_mean, team_game_points_mean, team_goals_mean_l5, team_goals_suf_mean_l5, 
            team_game_points_mean_l5, team_rank,
           team_goals_by_rank, team_goals_by_rank_l5, team_goals_suf_by_rank, team_goals_suf_by_rank_l5,
           team_game_points_rank_mean, team_game_points_rank_mean_l5]
    

In [6]:
from tqdm import tqdm

In [7]:
teams = df.home_team.drop_duplicates().values

In [8]:
full_feats = []
for t in tqdm(teams):
    feats = feats_calc(df, t)
    full_feats.append(feats)

100%|████████████████████████████████████████████████████████████████████████████████| 195/195 [00:03<00:00, 58.55it/s]


In [9]:
pd.DataFrame(full_feats, columns=['team_team', 'team_goals_mean', 'team_goals_suf_mean', 'team_game_points_mean', 
                                  'team_goals_mean_l5', 'team_goals_suf_mean_l5', 'team_game_points_mean_l5', 'rank_team',
                                 'team_goals_by_rank', 'team_goals_by_rank_l5', 'team_goals_suf_by_rank', 'team_goals_suf_by_rank_l5',
                                   'team_game_points_rank_mean', 'team_game_points_rank_mean_l5']).to_csv('features.csv', index=False)

In [23]:
df[(df.home_team == 'Hungary') | (df.away_team == 'Hungary')][['home_team', 'away_team','home_score', 'away_score', 'rank_home', 'rank_away']]

Unnamed: 0,home_team,away_team,home_score,away_score,rank_home,rank_away
12,Croatia,Hungary,3.0,0.0,8.0,50.0
74,Hungary,Azerbaijan,1.0,0.0,50.0,109.0
208,Hungary,Uruguay,1.0,2.0,50.0,5.0
267,Wales,Hungary,2.0,0.0,24.0,50.0
331,Turkey,Hungary,0.0,1.0,29.0,52.0
357,Hungary,Russia,2.0,3.0,52.0,38.0
407,Bulgaria,Hungary,1.0,3.0,60.0,52.0
443,Serbia,Hungary,0.0,1.0,31.0,52.0
489,Russia,Hungary,0.0,0.0,32.0,52.0
525,Hungary,Iceland,2.0,1.0,47.0,39.0
