In [56]:
from fastai.tabular.all import *

In [57]:
learn_win_loss = load_learner("./20220919_model_win_loss")
# learn_win_loss_draw = load_learner("./20220919_model_win_loss_draw")

In [58]:
# load rankings of all teams
rankings = pd.read_csv('./datasets/fifa_ranking-2022-08-25.csv')
rankings = rankings.loc[:,['rank', 'country_full', 'country_abrv', 'rank_date']]
rankings.country_full.replace("^IR Iran*", "Iran", regex=True, inplace=True)
rankings['rank_date'] = pd.to_datetime(rankings['rank_date'])

rankings = rankings.set_index(['rank_date'])\
                    .groupby(['country_full'],group_keys = False)\
                    .resample('D').first()\
                    .fillna(method='ffill')\
                    .reset_index()
rankings.tail()

Unnamed: 0,rank_date,rank,country_full,country_abrv
2285125,2022-08-21,123.0,Zimbabwe,ZIM
2285126,2022-08-22,123.0,Zimbabwe,ZIM
2285127,2022-08-23,123.0,Zimbabwe,ZIM
2285128,2022-08-24,123.0,Zimbabwe,ZIM
2285129,2022-08-25,123.0,Zimbabwe,ZIM


In [59]:
# cleanup WM 18 matches
world_cup18 = pd.read_csv("./datasets/World Cup 2018 Dataset.csv")
world_cup18 = world_cup18.loc[:, ['Team', 'Group', 'First match \nagainst', 'Second match\n against', 'Third match\n against']]
world_cup18 = world_cup18.dropna(how='all')
world_cup18 = world_cup18.replace({"IRAN": "Iran", 
                               "Costarica": "Costa Rica", 
                               "Porugal": "Portugal", 
                               "Columbia": "Colombia", 
                               "Korea" : "Korea Republic"})
world_cup18 = world_cup18.set_index('Team')
world_cup18.head()

Unnamed: 0_level_0,Group,First match \nagainst,Second match\n against,Third match\n against
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Russia,A,Saudi Arabia,Egypt,Uruguay
Saudi Arabia,A,Russia,Uruguay,Egypt
Egypt,A,Uruguay,Russia,Saudi Arabia
Uruguay,A,Egypt,Saudi Arabia,Russia
Portugal,B,Spain,Morocco,Iran


In [60]:
# define rankings at the time of the WM
world_cup_rankings = rankings.loc[(rankings['rank_date'] == rankings['rank_date'].max()) & 
                                    rankings['country_full'].isin(world_cup18.index.unique())]
world_cup_rankings = world_cup_rankings.set_index(['country_full'])
world_cup_rankings

Unnamed: 0_level_0,rank_date,rank,country_abrv
country_full,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Argentina,2022-08-25,3.0,ARG
Australia,2022-08-25,39.0,AUS
Belgium,2022-08-25,2.0,BEL
Brazil,2022-08-25,1.0,BRA
Colombia,2022-08-25,17.0,COL
Costa Rica,2022-08-25,34.0,CRC
Croatia,2022-08-25,15.0,CRO
Denmark,2022-08-25,10.0,DEN
Egypt,2022-08-25,40.0,EGY
England,2022-08-25,5.0,ENG


In [61]:
wm18_results = pd.read_csv('./datasets/WM2018Result.csv')
wm18_results = wm18_results.loc[:,["HomeTeam", "AwayTeam", "Group", "HomeTeamScore", "AwayTeamScore"]]
wm18_results = wm18_results[wm18_results.Group.isnull() == False]

def f(x):
    if (x == 0):
        return 1
    elif (x > 0):
        return 0
    elif (x < 0):
        return 2

# 0: home won, 1: away won, 2: draw
wm18_results['is_win_loss_draw'] = [f(xi) for xi in (wm18_results.HomeTeamScore - wm18_results.AwayTeamScore)]
# wm18_results["IsHomeWin"] = (wm18_results.HomeTeamScore - wm18_results.AwayTeamScore) > 0
# wm18_results["IsDraw"] = (wm18_results.HomeTeamScore - wm18_results.AwayTeamScore) == 0
wm18_results

Unnamed: 0,HomeTeam,AwayTeam,Group,HomeTeamScore,AwayTeamScore,is_win_loss_draw
0,Russia,Saudi Arabia,Group A,5,0,0
1,Egypt,Uruguay,Group A,0,1,2
2,Morocco,Iran,Group B,0,1,2
3,Portugal,Spain,Group B,3,3,1
4,France,Australia,Group C,2,1,0
5,Argentina,Iceland,Group D,1,1,1
6,Peru,Denmark,Group C,0,1,2
7,Croatia,Nigeria,Group D,2,0,0
8,Costa Rica,Serbia,Group E,0,1,2
9,Germany,Mexico,Group F,0,1,2


In [68]:
from itertools import combinations

opponents = ['First match \nagainst', 'Second match\n against', 'Third match\n against']

margin = 0.0

wm18_results['is_win_loss_draw_pred'] = 0
wm18_results['home_win_prob'] = 0.0

for index, item in wm18_results.iterrows():
    home = item['HomeTeam']
    away = item['AwayTeam']

    row = pd.DataFrame(np.array([[]]))
    home_rank = world_cup_rankings.loc[home, 'rank']
    opp_rank = world_cup_rankings.loc[away, 'rank']

    row['average_rank'] = (home_rank + opp_rank) / 2
    row['rank_difference'] = home_rank - opp_rank
    row['is_stake'] = True
    row['neutral'] = False
    row['home_team'] = home
    row['away_team'] = away

    dl = learn_win_loss.dls.test_dl(row, bs=1)
    preds, _ = learn_win_loss.get_preds(dl=dl)

    home_win_prob = preds.numpy()[0][1]
    wm18_results.at[index, 'home_win_prob'] = home_win_prob
    points = 0

    # home loss
    if home_win_prob < 0.5 - margin:
        wm18_results.at[index, 'is_win_loss_draw_pred'] = 2
    # home win
    elif home_win_prob >= 0.5 + margin:
        points = 3
        wm18_results.at[index, 'is_win_loss_draw_pred'] = 0
    # draw
    else:
        points = 1
        wm18_results.at[index, 'is_win_loss_draw_pred'] = 1

In [69]:
wm18_results["CorrectPred"] = False
wm18_results.loc[(wm18_results.is_win_loss_draw == wm18_results.is_win_loss_draw_pred), "CorrectPred"] = True
wm18_results

Unnamed: 0,HomeTeam,AwayTeam,Group,HomeTeamScore,AwayTeamScore,is_win_loss_draw,is_win_loss_draw_pred,home_win_prob,CorrectPred
0,Russia,Saudi Arabia,Group A,5,0,0,0,0.568519,True
1,Egypt,Uruguay,Group A,0,1,2,2,0.294415,True
2,Morocco,Iran,Group B,0,1,2,0,0.669329,False
3,Portugal,Spain,Group B,3,3,1,2,0.276423,False
4,France,Australia,Group C,2,1,0,0,0.727437,True
5,Argentina,Iceland,Group D,1,1,1,0,0.730356,False
6,Peru,Denmark,Group C,0,1,2,2,0.468928,True
7,Croatia,Nigeria,Group D,2,0,0,0,0.666107,True
8,Costa Rica,Serbia,Group E,0,1,2,2,0.494594,True
9,Germany,Mexico,Group F,0,1,2,0,0.666222,False


In [70]:
acc = wm18_results[wm18_results.CorrectPred == True].CorrectPred.count() / wm18_results.CorrectPred.count()
print("Accuracy Group Stage: {:.3f}".format(acc))

Accuracy Group Stage: 0.625


In [71]:
# home wins correct predicted
wm18_results[(wm18_results.is_win_loss_draw == 0) & (wm18_results.is_win_loss_draw == wm18_results.is_win_loss_draw_pred)].CorrectPred.count() / wm18_results[wm18_results.is_win_loss_draw == 0].CorrectPred.count()

0.7647058823529411

In [72]:
# draws correct predicted
wm18_results[(wm18_results.is_win_loss_draw == 1) & (wm18_results.is_win_loss_draw == wm18_results.is_win_loss_draw_pred)].CorrectPred.count() / wm18_results[wm18_results.is_win_loss_draw == 1].CorrectPred.count()

0.0

In [73]:
# home loss correct predicted
wm18_results[(wm18_results.is_win_loss_draw == 2) & (wm18_results.is_win_loss_draw == wm18_results.is_win_loss_draw_pred)].CorrectPred.count() / wm18_results[wm18_results.is_win_loss_draw == 2].CorrectPred.count()

0.7727272727272727

In [161]:
wm18_results[wm18_results.IsDraw == wm18_results.PredDraw].CorrectPred.count() / wm18_results.CorrectPred.count()

0.8125