In [144]:
from fastai.tabular.all import *

In [145]:
learn = load_learner("./model")

In [146]:
rankings = pd.read_csv('./datasets/fifa_ranking.csv')
rankings = rankings.loc[:,['rank', 'country_full', 'country_abrv', 'cur_year_avg_weighted', 'rank_date', 
                           'two_year_ago_weighted', 'three_year_ago_weighted']]
rankings.country_full.replace("^IR Iran*", "Iran", regex=True, inplace=True)
rankings['weighted_points'] =  rankings['cur_year_avg_weighted'] + rankings['two_year_ago_weighted'] + rankings['three_year_ago_weighted']
rankings['rank_date'] = pd.to_datetime(rankings['rank_date'])

rankings = rankings.set_index(['rank_date'])\
                    .groupby(['country_full'],group_keys = False)\
                    .resample('D').first()\
                    .fillna(method='ffill')\
                    .reset_index()
rankings.head()

Unnamed: 0,rank_date,rank,country_full,country_abrv,cur_year_avg_weighted,two_year_ago_weighted,three_year_ago_weighted,weighted_points
0,2003-01-15,204.0,Afghanistan,AFG,0.0,0.0,0.0,0.0
1,2003-01-16,204.0,Afghanistan,AFG,0.0,0.0,0.0,0.0
2,2003-01-17,204.0,Afghanistan,AFG,0.0,0.0,0.0,0.0
3,2003-01-18,204.0,Afghanistan,AFG,0.0,0.0,0.0,0.0
4,2003-01-19,204.0,Afghanistan,AFG,0.0,0.0,0.0,0.0


In [147]:
world_cup18 = pd.read_csv("./datasets/World Cup 2018 Dataset.csv")
world_cup18 = world_cup18.loc[:, ['Team', 'Group', 'First match \nagainst', 'Second match\n against', 'Third match\n against']]
world_cup18 = world_cup18.dropna(how='all')
world_cup18 = world_cup18.replace({"IRAN": "Iran", 
                               "Costarica": "Costa Rica", 
                               "Porugal": "Portugal", 
                               "Columbia": "Colombia", 
                               "Korea" : "Korea Republic"})
world_cup18 = world_cup18.set_index('Team')
world_cup18.head()

Unnamed: 0_level_0,Group,First match \nagainst,Second match\n against,Third match\n against
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Russia,A,Saudi Arabia,Egypt,Uruguay
Saudi Arabia,A,Russia,Uruguay,Egypt
Egypt,A,Uruguay,Russia,Saudi Arabia
Uruguay,A,Egypt,Saudi Arabia,Russia
Portugal,B,Spain,Morocco,Iran


In [148]:
world_cup_rankings = rankings.loc[(rankings['rank_date'] == rankings['rank_date'].max()) & 
                                    rankings['country_full'].isin(world_cup18.index.unique())]
world_cup_rankings = world_cup_rankings.set_index(['country_full'])
world_cup_rankings

Unnamed: 0_level_0,rank_date,rank,country_abrv,cur_year_avg_weighted,two_year_ago_weighted,three_year_ago_weighted,weighted_points
country_full,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Argentina,2018-06-07,5.0,ARG,404.07,248.99,183.59,836.65
Australia,2018-06-07,36.0,AUS,366.6,98.16,59.79,524.55
Belgium,2018-06-07,3.0,BEL,629.98,158.94,186.58,975.5
Brazil,2018-06-07,2.0,BRA,558.95,168.06,162.38,889.39
Colombia,2018-06-07,16.0,COL,292.09,199.73,166.38,658.2
Costa Rica,2018-06-07,23.0,CRC,410.9,99.73,133.11,643.74
Croatia,2018-06-07,20.0,CRO,397.75,100.79,110.25,608.79
Denmark,2018-06-07,12.0,DEN,723.72,90.07,80.21,894.0
Egypt,2018-06-07,45.0,EGY,208.27,99.48,50.64,358.39
England,2018-06-07,12.0,ENG,476.42,174.15,103.52,754.09


In [149]:
wm18_results = pd.read_csv('./datasets/WM2018Result.csv')
wm18_results = wm18_results.loc[:,["HomeTeam", "AwayTeam", "Group", "HomeTeamScore", "AwayTeamScore"]]
wm18_results = wm18_results[wm18_results.Group.isnull() == False]

# 0: home won, 1: away won, 2: draw
wm18_results["IsHomeWin"] = (wm18_results.HomeTeamScore - wm18_results.AwayTeamScore) > 0
wm18_results["IsDraw"] = (wm18_results.HomeTeamScore - wm18_results.AwayTeamScore) == 0
wm18_results

Unnamed: 0,HomeTeam,AwayTeam,Group,HomeTeamScore,AwayTeamScore,IsHomeWin,IsDraw
0,Russia,Saudi Arabia,Group A,5,0,True,False
1,Egypt,Uruguay,Group A,0,1,False,False
2,Morocco,Iran,Group B,0,1,False,False
3,Portugal,Spain,Group B,3,3,False,True
4,France,Australia,Group C,2,1,True,False
5,Argentina,Iceland,Group D,1,1,False,True
6,Peru,Denmark,Group C,0,1,False,False
7,Croatia,Nigeria,Group D,2,0,True,False
8,Costa Rica,Serbia,Group E,0,1,False,False
9,Germany,Mexico,Group F,0,1,False,False


In [156]:
from itertools import combinations

opponents = ['First match \nagainst', 'Second match\n against', 'Third match\n against']

world_cup18['points'] = 0
world_cup18['total_prob'] = 0
wm18_results['PredHomeWin'] = False
wm18_results['PredDraw'] = False

margin = 0.0

for group in set(world_cup18['Group']):
    for home, away in combinations(world_cup18.query('Group =="{}"'.format(group)).index, 2):
        row = pd.DataFrame(np.array([[]]))
        home_rank = world_cup_rankings.loc[home, 'rank']
        home_points = world_cup_rankings.loc[home, 'weighted_points']
        opp_rank = world_cup_rankings.loc[away, 'rank']
        opp_points = world_cup_rankings.loc[away, 'weighted_points']
        row['average_rank'] = (home_rank + opp_rank) / 2
        row['rank_difference'] = home_rank - opp_rank
        row['point_difference'] = home_points - opp_points
        row['is_stake'] = True
        row['neutral'] = False
        row['home_team'] = home
        row['away_team'] = away

        dl = learn.dls.test_dl(row, bs=1)
        preds, _ = learn.get_preds(dl=dl)

        home_win_prob = preds.numpy()[0][1]
        world_cup18.loc[home, 'total_prob'] += home_win_prob
        world_cup18.loc[away, 'total_prob'] += 1-home_win_prob
        
        points = 0
        if home_win_prob <= 0.5 - margin:
            world_cup18.loc[away, 'points'] += 3
        if home_win_prob > 0.5 - margin:
            points = 1
        if home_win_prob >= 0.5 + margin:
            points = 3
            world_cup18.loc[home, 'points'] += 3
            wm18_results.loc[(wm18_results.HomeTeam==home) & (wm18_results.AwayTeam==away), 'PredHomeWin'] = True
        if points == 1:
            wm18_results.loc[(wm18_results.HomeTeam==home) & (wm18_results.AwayTeam==away), 'PredDraw'] = True
            world_cup18.loc[home, 'points'] += 1
            world_cup18.loc[away, 'points'] += 1

In [157]:
world_cup18

Unnamed: 0_level_0,Group,First match \nagainst,Second match\n against,Third match\n against,points,total_prob
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Russia,A,Saudi Arabia,Egypt,Uruguay,0,0.98148
Saudi Arabia,A,Russia,Uruguay,Egypt,3,1.193047
Egypt,A,Uruguay,Russia,Saudi Arabia,6,1.55173
Uruguay,A,Egypt,Saudi Arabia,Russia,9,2.273744
Portugal,B,Spain,Morocco,Iran,6,1.569453
Spain,B,Portugal,Iran,Morocco,9,2.065388
Morocco,B,Iran,Portugal,Spain,0,0.989692
Iran,B,Morocco,Spain,Portugal,3,1.375466
France,C,Australia,Peru,Denmark,9,1.908203
Australia,C,France,Denmark,Peru,6,1.455542


In [158]:
wm18_results["CorrectPred"] = False
wm18_results.loc[(wm18_results.IsHomeWin == wm18_results.PredHomeWin) & (wm18_results.IsDraw == wm18_results.PredDraw), "CorrectPred"] = True
wm18_results

Unnamed: 0,HomeTeam,AwayTeam,Group,HomeTeamScore,AwayTeamScore,IsHomeWin,IsDraw,PredHomeWin,PredDraw,CorrectPred
0,Russia,Saudi Arabia,Group A,5,0,True,False,False,False,False
1,Egypt,Uruguay,Group A,0,1,False,False,False,False,True
2,Morocco,Iran,Group B,0,1,False,False,False,False,True
3,Portugal,Spain,Group B,3,3,False,True,False,False,False
4,France,Australia,Group C,2,1,True,False,True,False,True
5,Argentina,Iceland,Group D,1,1,False,True,True,False,False
6,Peru,Denmark,Group C,0,1,False,False,False,False,True
7,Croatia,Nigeria,Group D,2,0,True,False,True,False,True
8,Costa Rica,Serbia,Group E,0,1,False,False,False,False,True
9,Germany,Mexico,Group F,0,1,False,False,True,False,False


In [159]:
acc = wm18_results[wm18_results.CorrectPred == True].CorrectPred.count() / wm18_results.CorrectPred.count()
print("Accuracy Group Stage: {:.3f}".format(acc))

Accuracy Group Stage: 0.562


In [160]:
wm18_results[wm18_results.IsHomeWin == wm18_results.PredHomeWin].CorrectPred.count() / wm18_results.CorrectPred.count()

0.6666666666666666

In [161]:
wm18_results[wm18_results.IsDraw == wm18_results.PredDraw].CorrectPred.count() / wm18_results.CorrectPred.count()

0.8125