In [17]:
from fastai.tabular.all import *

In [18]:
learn_win_loss = load_learner("./20221015_model_reg")

In [19]:
# load rankings of all teams
rankings = pd.read_csv('./datasets/fifa_ranking-2022-08-25.csv')
rankings = rankings.loc[:,['rank', 'country_full', 'country_abrv', 'rank_date']]
rankings.country_full.replace("^IR Iran*", "Iran", regex=True, inplace=True)
rankings['rank_date'] = pd.to_datetime(rankings['rank_date'])

rankings = rankings.set_index(['rank_date'])\
                    .groupby(['country_full'],group_keys = False)\
                    .resample('D').first()\
                    .fillna(method='ffill')\
                    .reset_index()
rankings.tail()

Unnamed: 0,rank_date,rank,country_full,country_abrv
2285125,2022-08-21,123.0,Zimbabwe,ZIM
2285126,2022-08-22,123.0,Zimbabwe,ZIM
2285127,2022-08-23,123.0,Zimbabwe,ZIM
2285128,2022-08-24,123.0,Zimbabwe,ZIM
2285129,2022-08-25,123.0,Zimbabwe,ZIM


In [20]:
# cleanup WM 18 matches
world_cup18 = pd.read_csv("./datasets/World Cup 2018 Dataset.csv")
world_cup18 = world_cup18.loc[:, ['Team', 'Group', 'First match \nagainst', 'Second match\n against', 'Third match\n against']]
world_cup18 = world_cup18.dropna(how='all')
world_cup18 = world_cup18.replace({"IRAN": "Iran", 
                               "Costarica": "Costa Rica", 
                               "Porugal": "Portugal", 
                               "Columbia": "Colombia", 
                               "Korea" : "Korea Republic"})
world_cup18 = world_cup18.set_index('Team')
world_cup18.head()

Unnamed: 0_level_0,Group,First match \nagainst,Second match\n against,Third match\n against
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Russia,A,Saudi Arabia,Egypt,Uruguay
Saudi Arabia,A,Russia,Uruguay,Egypt
Egypt,A,Uruguay,Russia,Saudi Arabia
Uruguay,A,Egypt,Saudi Arabia,Russia
Portugal,B,Spain,Morocco,Iran


In [21]:
# define rankings at the time of the WM
world_cup_rankings = rankings.loc[(rankings['rank_date'] == rankings['rank_date'].max()) & 
                                    rankings['country_full'].isin(world_cup18.index.unique())]
world_cup_rankings = world_cup_rankings.set_index(['country_full'])
world_cup_rankings

Unnamed: 0_level_0,rank_date,rank,country_abrv
country_full,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Argentina,2022-08-25,3.0,ARG
Australia,2022-08-25,39.0,AUS
Belgium,2022-08-25,2.0,BEL
Brazil,2022-08-25,1.0,BRA
Colombia,2022-08-25,17.0,COL
Costa Rica,2022-08-25,34.0,CRC
Croatia,2022-08-25,15.0,CRO
Denmark,2022-08-25,10.0,DEN
Egypt,2022-08-25,40.0,EGY
England,2022-08-25,5.0,ENG


In [22]:
wm18_results = pd.read_csv('./datasets/WM2018Result.csv')
wm18_results = wm18_results.loc[:,["HomeTeam", "AwayTeam", "Group", "HomeTeamScore", "AwayTeamScore"]]
wm18_results = wm18_results[wm18_results.Group.isnull() == False]

wm18_results["is_won"] = (wm18_results.HomeTeamScore - wm18_results.AwayTeamScore) > 0

wm18_results

Unnamed: 0,HomeTeam,AwayTeam,Group,HomeTeamScore,AwayTeamScore,is_won
0,Russia,Saudi Arabia,Group A,5,0,True
1,Egypt,Uruguay,Group A,0,1,False
2,Morocco,Iran,Group B,0,1,False
3,Portugal,Spain,Group B,3,3,False
4,France,Australia,Group C,2,1,True
5,Argentina,Iceland,Group D,1,1,False
6,Peru,Denmark,Group C,0,1,False
7,Croatia,Nigeria,Group D,2,0,True
8,Costa Rica,Serbia,Group E,0,1,False
9,Germany,Mexico,Group F,0,1,False


In [58]:
from itertools import combinations

def getPoints(home_score, away_score, home_score_pred, away_score_pred): 
    if home_score == home_score_pred and away_score == away_score_pred:
        return 4
    elif home_score == away_score and home_score_pred == away_score_pred:
        return 2
    elif home_score - away_score == home_score_pred - away_score_pred:
        return 3
    elif home_score > away_score and home_score_pred > away_score_pred:
        return 2
    elif home_score < away_score and home_score_pred < away_score_pred:
        return 2
    else:
        return 0

wm18_results['home_score_pred'] = 0
wm18_results['home_score_prob'] = 0.0
wm18_results['away_score_pred'] = 0
wm18_results['away_score_prob'] = 0.0
wm18_results['tipp_points'] = 0

for index, item in wm18_results.iterrows():
    home = item['HomeTeam']
    away = item['AwayTeam']

    row = pd.DataFrame(np.array([[]]))
    home_rank = world_cup_rankings.loc[home, 'rank']
    opp_rank = world_cup_rankings.loc[away, 'rank']

    row['average_rank'] = (home_rank + opp_rank) / 2
    row['rank_difference'] = home_rank - opp_rank
    row['is_stake'] = True
    row['home_team'] = home
    row['away_team'] = away
    row['is_won'] = item['is_won']

    dl = learn_win_loss.dls.test_dl(row, bs=1)
    preds, _ = learn_win_loss.get_preds(dl=dl)

    # print(preds.numpy(), home, away)
    home_score_prob = preds.numpy()[0][0]
    away_score_prob = preds.numpy()[0][1]
    wm18_results.at[index, 'home_score_prob'] = home_score_prob
    wm18_results.at[index, 'home_score_pred'] = home_score_prob.round(0)
    wm18_results.at[index, 'away_score_prob'] = away_score_prob
    wm18_results.at[index, 'away_score_pred'] = away_score_prob.round(0)
    wm18_results.at[index, 'tipp_points'] = getPoints(item.HomeTeamScore, item.AwayTeamScore, home_score_prob.round(0), away_score_prob.round(0))

wm18_results

Unnamed: 0,HomeTeam,AwayTeam,Group,HomeTeamScore,AwayTeamScore,is_won,home_score_pred,home_score_prob,away_score_pred,away_score_prob,tipp_points
0,Russia,Saudi Arabia,Group A,5,0,True,2,2.404817,0,0.431003,2
1,Egypt,Uruguay,Group A,0,1,False,1,0.644541,2,2.088127,3
2,Morocco,Iran,Group B,0,1,False,1,0.695365,1,1.354071,0
3,Portugal,Spain,Group B,3,3,False,1,0.758356,2,1.532779,0
4,France,Australia,Group C,2,1,True,3,2.717791,0,0.385791,2
5,Argentina,Iceland,Group D,1,1,False,1,0.80064,1,1.202904,4
6,Peru,Denmark,Group C,0,1,False,1,0.637869,1,1.301889,0
7,Croatia,Nigeria,Group D,2,0,True,2,2.207614,1,0.554852,2
8,Costa Rica,Serbia,Group E,0,1,False,1,0.63639,1,1.453297,0
9,Germany,Mexico,Group F,0,1,False,1,0.689542,2,1.607236,3


In [59]:
wm18_results.tipp_points.sum() / wm18_results.tipp_points.count()

1.8333333333333333

In [60]:
wm18_results[wm18_results.tipp_points == 4].tipp_points.count()

10

In [61]:
wm18_results[wm18_results.tipp_points == 3].tipp_points.count()

6

In [62]:
wm18_results[wm18_results.tipp_points == 2].tipp_points.count()

15

In [63]:
wm18_results[wm18_results.tipp_points == 0].tipp_points.count()

17

In [68]:
wm18_results[(wm18_results.tipp_points == 0) & (wm18_results.HomeTeamScore == wm18_results.AwayTeamScore)].tipp_points.count()

4