# Group of features extraction given a certain game

In [1]:
import pandas as pd
import numpy as np
import utils
import preds_evaluation

In [5]:
training_set = pd.read_parquet("datasets/processed/training_set_processed.parquet")
training_set

Unnamed: 0,Sea,Lge,Date,HT,AT,HS,AS,GD,WDL,Goals
7102,00-01,CHN1,2000-03-19,Dalian Haichang,Beijing Guoan,2,0,2,W,2
7103,00-01,CHN1,2000-03-19,Qingdao,Shanghai Greenland,1,1,0,D,2
7108,00-01,CHN1,2000-03-19,Xiamen Xiaxin,Liaoning,3,0,3,W,3
7106,00-01,CHN1,2000-03-19,Shenzhen FC,Sichuan Guancheng,0,0,0,D,0
7105,00-01,CHN1,2000-03-19,Guangzhou Rich and Force,Chongqing Lifan,0,0,0,D,0
...,...,...,...,...,...,...,...,...,...,...
299041,22-23,TUN1,2023-02-08,EO Sidi Bouzid,ES Metlaoui,3,4,-1,L,7
299040,22-23,TUN1,2023-02-08,ES Sahel,CA Bizertin,0,2,-2,L,2
299039,22-23,TUN1,2023-02-08,US Ben Guerdane,Soliman,2,2,0,D,4
299037,22-23,TUN1,2023-02-08,Stade Tunisien,US Tataouine,2,1,1,W,3


In [19]:
df, s, p = utils.get_team_last_matches(training_set, "Real Madrid")
df

Unnamed: 0,Sea,Lge,Date,HT,AT,HS,AS,GD,WDL,Goals
298599,22-23,SPA1,2023-01-07,Villarreal,Real Madrid,2,1,1,W,3
298623,22-23,SPA1,2023-01-22,Athletic Bilbao,Real Madrid,0,2,-2,L,2
298633,22-23,SPA1,2023-01-29,Real Madrid,Real Sociedad,0,0,0,D,0
298636,22-23,SPA1,2023-02-02,Real Madrid,Valencia,2,0,2,W,2
298642,22-23,SPA1,2023-02-05,Mallorca,Real Madrid,1,0,1,W,1


### Matrix of 4 rows: 
- attacking
- defense
- opponent strength as avg opponent points in last games
- home/away (1 or -1 value)
### And 2xN columns (being N the number of previous games considered, team1 and team2 concatenated horizontally)

In [27]:
def extract_feature_group_game(team1, team2, n_previous_games):
    feat_1, feat_2, feat_3, feat_4 = list(), list(), list(), list()
    
    for team in [team1, team2]:
        # Four features to record for each team
        scored_t, conceded_t, opponent_t, h_advantage_t = [], [], [], []

        df, s, p = utils.get_team_last_matches(training_set, team, n_previous_games)
        for i, game in df.iterrows():
            # Offensive strength
            scored = game.HS if game.HT == team else game.AS
            # Deffensive strength
            conceded = game.AS if game.HT == team else game.HS
            # diff = game.GD if game.HT == team else -game.GD
            # Opponent strength (avg points in last n games)
            opponent = game.AT if game.HT == team else game.HT
            df_op, _, p_op = utils.get_team_last_matches(training_set, opponent, n_previous_games)
            avg_opp_points = p_op / df_op.shape[0]
            # Home advantage or not
            home_adv = 1 if game.HT == team else -1
            
            scored_t.insert(0, scored)
            conceded_t.insert(0, conceded)
            # dif.insert(0, diff)
            opponent_t.insert(0, avg_opp_points)
            h_advantage_t.insert(0, home_adv)
        
        # Append team1 features with team2 features horizontally
        feat_1 += scored_t
        feat_2 += conceded_t
        feat_3 += opponent_t
        feat_4 += h_advantage_t
        
    print(feat_1)
    print(feat_2)
    print(feat_3)
    print(feat_4)

In [28]:
extract_feature_group_game("Real Madrid", "Eibar", 5)

[0, 2, 0, 2, 1, 0, 0, 1, 2, 1]
[1, 0, 0, 0, 2, 2, 0, 0, 1, 0]
[1.8, 0.2, 2.0, 0.8, 1.4, 2.4, 1.6, 1.2, 0.6, 0.4]
[-1, 1, 1, -1, -1, -1, 1, -1, 1, 1]


In [34]:
df, _, _ = utils.get_team_last_matches(training_set, "Ponferradina")
df

Unnamed: 0,Sea,Lge,Date,HT,AT,HS,AS,GD,WDL,Goals
298878,22-23,SPA2,2023-01-06,Ponferradina,Villarreal B,2,1,1,W,3
298893,22-23,SPA2,2023-01-14,Tenerife,Ponferradina,0,0,0,D,0
298902,22-23,SPA2,2023-01-21,Ponferradina,Eibar,0,1,-1,L,1
298921,22-23,SPA2,2023-01-30,Zaragoza,Ponferradina,0,0,0,D,0
298930,22-23,SPA2,2023-02-05,Ponferradina,Santander,1,1,0,D,2


In [49]:
def predict_game(home_team: str, away_team: str, teams: dict = teams):
    ''' Predicts game outcome through Poisson.
    
    Inputs:
        - home_team, away_team: teams involved in game
        - teams: statistic dict with all teams in training set stats
        
    Outputs:
        - HS, AS: predicted game outcome as exact home and away team score
        - pr_home, pr_draw, pr_away: predicted WDL probabilities
    '''
    
    # Calculate the value of lambda (λ) for both Home Team and Away Team.
    ht_stats_dict = [t for t in teams if t["name"] == home_team][0]
    at_stats_dict = [t for t in teams if t["name"] == away_team][0]
    
    avg_ht_scored = ht_stats_dict["avg_goals_scored"]
    avg_ht_conceded = ht_stats_dict["avg_goals_against"]
    avg_at_scored = at_stats_dict["avg_goals_scored"]
    avg_at_conceded = at_stats_dict["avg_goals_against"]
    
    lambda_home_team = avg_ht_scored * avg_at_conceded
    lambda_away_team = avg_at_scored * avg_ht_conceded
    print(lambda_home_team, lambda_away_team)
    
    prd_W, prd_D, prd_L = 0, 0, 0
    result_probs = list()
    
    for x in range(0, 14):  # number of goals home team (max14)
        for y in range(0, 14):  # number of goals away team (max14)
            p = poisson.pmf(x, lambda_home_team) * poisson.pmf(y, lambda_away_team)
            result_probs.append((x, y, p))
            if p > 0.01: print(f"Result: {x}-{y}, Prob: {p}")
            if x == y:
                prd_D += p
            elif x > y:
                prd_W += p
            else:
                prd_L += p
    
    HS, AS, P = max(result_probs, key=itemgetter(2))
      
    # print(f"{home_team}: {prd_W}")
    # print(f"Draw: {prd_D}")
    # print(f"{away_team}: {prd_L}")
    # print(f"Most probable result: {HS}-{AS} ({P})")
                 
    # print(lambda_home_team, lambda_away_team)
    
    return HS, AS, prd_W, prd_D, prd_L

In [50]:
from scipy.stats import poisson
from operator import itemgetter
predict_game("Real Madrid", "FC Barcelona")


2.104056 2.2349349999999997
Result: 0-0, Prob: 0.013049688698748355
Result: 0-1, Prob: 0.029165206011937145
Result: 0-2, Prob: 0.032591169849144376
Result: 0-3, Prob: 0.02427971539559916
Result: 0-4, Prob: 0.013565896431915846
Result: 1-0, Prob: 0.027457275804733665
Result: 1-1, Prob: 0.06136522670065242
Result: 1-2, Prob: 0.06857364646811132
Result: 1-3, Prob: 0.05108588085640278
Result: 1-4, Prob: 0.028543405782951125
Result: 1-5, Prob: 0.012758531320703976
Result: 2-0, Prob: 0.028885822950302356
Result: 2-1, Prob: 0.06455793671543399
Result: 2-2, Prob: 0.07214139614655424
Result: 2-3, Prob: 0.05374377706559972
Result: 2-4, Prob: 0.030028462099026515
Result: 2-5, Prob: 0.013422332188257566
Result: 3-0, Prob: 0.020259129697840456
Result: 3-1, Prob: 0.04527783803124305
Result: 3-2, Prob: 0.0505965124701781
Result: 3-3, Prob: 0.037693305532512496
Result: 3-4, Prob: 0.021060521950076443
Result: 4-0, Prob: 0.010656585848879846
Result: 4-1, Prob: 0.02381677669416627
Result: 4-2, Prob: 0.02

(2, 2, 0.37685036285361095, 0.19771185588540796, 0.4254376154420774)