In [1]:
from fastai.tabular.all import *

In [3]:
learn_class = load_learner("./20220919_model_win_loss")
learn_reg = load_learner("./20221015_model_reg")

In [4]:
# load rankings of all teams
rankings = pd.read_csv('./datasets/fifa_ranking-2022-08-25.csv')
rankings = rankings.loc[:,['rank', 'country_full', 'country_abrv', 'rank_date']]
rankings.country_full.replace("^IR Iran*", "Iran", regex=True, inplace=True)
rankings['rank_date'] = pd.to_datetime(rankings['rank_date'])

rankings = rankings.set_index(['rank_date'])\
                    .groupby(['country_full'],group_keys = False)\
                    .resample('D').first()\
                    .fillna(method='ffill')\
                    .reset_index()
rankings.tail()

Unnamed: 0,rank_date,rank,country_full,country_abrv
2285125,2022-08-21,123.0,Zimbabwe,ZIM
2285126,2022-08-22,123.0,Zimbabwe,ZIM
2285127,2022-08-23,123.0,Zimbabwe,ZIM
2285128,2022-08-24,123.0,Zimbabwe,ZIM
2285129,2022-08-25,123.0,Zimbabwe,ZIM


In [5]:
# cleanup WM 18 matches
world_cup18 = pd.read_csv("./datasets/World Cup 2018 Dataset.csv")
world_cup18 = world_cup18.loc[:, ['Team', 'Group', 'First match \nagainst', 'Second match\n against', 'Third match\n against']]
world_cup18 = world_cup18.dropna(how='all')
world_cup18 = world_cup18.replace({"IRAN": "Iran", 
                               "Costarica": "Costa Rica", 
                               "Porugal": "Portugal", 
                               "Columbia": "Colombia", 
                               "Korea" : "Korea Republic"})
world_cup18 = world_cup18.set_index('Team')
world_cup18.head()

Unnamed: 0_level_0,Group,First match \nagainst,Second match\n against,Third match\n against
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Russia,A,Saudi Arabia,Egypt,Uruguay
Saudi Arabia,A,Russia,Uruguay,Egypt
Egypt,A,Uruguay,Russia,Saudi Arabia
Uruguay,A,Egypt,Saudi Arabia,Russia
Portugal,B,Spain,Morocco,Iran


In [6]:
# define rankings at the time of the WM
world_cup_rankings = rankings.loc[(rankings['rank_date'] == rankings['rank_date'].max()) & 
                                    rankings['country_full'].isin(world_cup18.index.unique())]
world_cup_rankings = world_cup_rankings.set_index(['country_full'])
world_cup_rankings

Unnamed: 0_level_0,rank_date,rank,country_abrv
country_full,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Argentina,2022-08-25,3.0,ARG
Australia,2022-08-25,39.0,AUS
Belgium,2022-08-25,2.0,BEL
Brazil,2022-08-25,1.0,BRA
Colombia,2022-08-25,17.0,COL
Costa Rica,2022-08-25,34.0,CRC
Croatia,2022-08-25,15.0,CRO
Denmark,2022-08-25,10.0,DEN
Egypt,2022-08-25,40.0,EGY
England,2022-08-25,5.0,ENG


In [34]:
wm18_results = pd.read_csv('./datasets/WM2018Result.csv')
wm18_results = wm18_results.loc[:,["HomeTeam", "AwayTeam", "Group", "HomeTeamScore", "AwayTeamScore"]]
# wm18_results = wm18_results[wm18_results.Group.isnull() == False]
wm18_results = wm18_results[(wm18_results.Group.isnull() == True) & (wm18_results.HomeTeamScore != wm18_results.AwayTeamScore)]

wm18_results["is_won"] = (wm18_results.HomeTeamScore - wm18_results.AwayTeamScore) > 0

wm18_results

Unnamed: 0,HomeTeam,AwayTeam,Group,HomeTeamScore,AwayTeamScore,is_won
48,France,Argentina,,4,3,True
49,Uruguay,Portugal,,2,1,True
52,Brazil,Mexico,,2,0,True
53,Belgium,Japan,,3,2,True
54,Sweden,Switzerland,,1,0,True
56,Uruguay,France,,0,2,False
57,Brazil,Belgium,,1,2,False
58,Sweden,England,,0,2,False
60,France,Belgium,,1,0,True
61,England,Croatia,,1,2,False


In [35]:
from itertools import combinations

def getPoints(home_score, away_score, home_score_pred, away_score_pred): 
    if home_score == home_score_pred and away_score == away_score_pred:
        return 4
    elif home_score == away_score and home_score_pred == away_score_pred:
        return 2
    elif home_score - away_score == home_score_pred - away_score_pred:
        return 3
    elif home_score > away_score and home_score_pred > away_score_pred:
        return 2
    elif home_score < away_score and home_score_pred < away_score_pred:
        return 2
    else:
        return 0

margin = 0.0

wm18_results['is_win_pred'] = False
wm18_results['home_score_pred'] = 0
wm18_results['home_score_prob'] = 0.0
wm18_results['away_score_pred'] = 0
wm18_results['away_score_prob'] = 0.0
wm18_results['tipp_points'] = 0

for index, item in wm18_results.iterrows():
    home = item['HomeTeam']
    away = item['AwayTeam']

    row = pd.DataFrame(np.array([[]]))
    home_rank = world_cup_rankings.loc[home, 'rank']
    opp_rank = world_cup_rankings.loc[away, 'rank']

    row['average_rank'] = (home_rank + opp_rank) / 2
    row['rank_difference'] = home_rank - opp_rank
    row['is_stake'] = True
    row['home_team'] = home
    row['away_team'] = away

    # classification
    dl_class = learn_class.dls.test_dl(row, bs=1)
    preds_class, _ = learn_class.get_preds(dl=dl_class)
    home_win_prob = preds_class.numpy()[0][1]

    if home_win_prob >= 0.5 - margin:
        wm18_results.at[index, 'is_win_pred'] = True
        row['is_won'] = True
    else:
        row['is_won'] = False

    # regression
    dl_reg = learn_reg.dls.test_dl(row, bs=1)
    preds_reg, _ = learn_reg.get_preds(dl=dl_reg)

    # print(preds.numpy(), home, away)
    home_score_prob = preds_reg.numpy()[0][0]
    away_score_prob = preds_reg.numpy()[0][1]
    wm18_results.at[index, 'home_score_prob'] = home_score_prob
    wm18_results.at[index, 'home_score_pred'] = math.floor(home_score_prob)
    wm18_results.at[index, 'away_score_prob'] = away_score_prob
    wm18_results.at[index, 'away_score_pred'] = math.floor(away_score_prob)
    wm18_results.at[index, 'tipp_points'] = getPoints(item.HomeTeamScore, item.AwayTeamScore, math.floor(home_score_prob), math.floor(away_score_prob))

wm18_results

Unnamed: 0,HomeTeam,AwayTeam,Group,HomeTeamScore,AwayTeamScore,is_won,is_win_pred,home_score_pred,home_score_prob,away_score_pred,away_score_prob,tipp_points
48,France,Argentina,,4,3,True,True,2,2.412979,0,0.471401,2
49,Uruguay,Portugal,,2,1,True,False,0,0.895428,1,1.454747,0
52,Brazil,Mexico,,2,0,True,True,2,2.59628,0,0.569201,4
53,Belgium,Japan,,3,2,True,True,2,2.464831,0,0.538279,2
54,Sweden,Switzerland,,1,0,True,False,0,0.662465,1,1.347469,0
56,Uruguay,France,,0,2,False,False,0,0.567509,1,1.437714,2
57,Brazil,Belgium,,1,2,False,True,2,2.657808,0,0.530051,0
58,Sweden,England,,0,2,False,False,0,0.712266,1,1.5357,2
60,France,Belgium,,1,0,True,True,2,2.354854,0,0.454589,2
61,England,Croatia,,1,2,False,True,2,2.419039,0,0.429158,0


In [36]:
# results group stage
items = wm18_results.is_won.count()

acc_class = wm18_results[wm18_results.is_won == wm18_results.is_win_pred].is_won.count() / items
acc_reg = wm18_results.tipp_points.sum() / wm18_results.tipp_points.count()
fours = wm18_results[wm18_results.tipp_points == 4].tipp_points.count()
threes = wm18_results[wm18_results.tipp_points == 3].tipp_points.count()
twos = wm18_results[wm18_results.tipp_points == 2].tipp_points.count()
zeros = wm18_results[wm18_results.tipp_points == 0].tipp_points.count()
corr_pred_zero_pts = wm18_results[(wm18_results.is_won == wm18_results.is_win_pred) & (wm18_results.tipp_points == 0)]

print("Accuracy classification: {:.3f}".format(acc_class))
print("Average points: {:.3f}".format(acc_reg))
print("Four points: {:}, {:.3f}".format(fours, fours/items))
print("Three points: {:}, {:.3f}".format(threes, threes/items))
print("Two points: {:}, {:.3f}".format(twos, twos/items))
print("Zero points: {:}, {:.3f}".format(zeros, zeros/items))
print("Correct predicted but zero points: {:}".format(corr_pred_zero_pts.tipp_points.count()))
corr_pred_zero_pts

Accuracy classification: 0.583
Average points: 1.417
Four points: 1, 0.083
Three points: 1, 0.083
Two points: 5, 0.417
Zero points: 5, 0.417
Correct predicted but zero points: 0


Unnamed: 0,HomeTeam,AwayTeam,Group,HomeTeamScore,AwayTeamScore,is_won,is_win_pred,home_score_pred,home_score_prob,away_score_pred,away_score_prob,tipp_points


In [38]:
wm18_results[(wm18_results.tipp_points == 0)]

Unnamed: 0,HomeTeam,AwayTeam,Group,HomeTeamScore,AwayTeamScore,is_won,is_win_pred,home_score_pred,home_score_prob,away_score_pred,away_score_prob,tipp_points
49,Uruguay,Portugal,,2,1,True,False,0,0.895428,1,1.454747,0
54,Sweden,Switzerland,,1,0,True,False,0,0.662465,1,1.347469,0
57,Brazil,Belgium,,1,2,False,True,2,2.657808,0,0.530051,0
61,England,Croatia,,1,2,False,True,2,2.419039,0,0.429158,0
62,Belgium,England,,2,0,True,False,0,0.871603,1,1.470375,0
