In [None]:
#===================================================================================================================================================================#
#This code is to simulate the process and result of the 2022 world cup
#The files used in the code are "2022 Team Ability.csv","2022 schedule.csv" and "2022 World Cup Player.csv" (encoding="utf-8-sig"),which can be find in the folder.
#The model we used are "result_classification.pkl" and "goal_diff_reg.pkl", which are trained from file "analysis_training.ipynb".
#===================================================================================================================================================================#

In [34]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

In [35]:
def ability_diff(home,away):
    ability=pd.read_csv("2022 Team Ability.csv",encoding="utf-8-sig")
    if ability["Nation"].str.contains(home).any():
        home_ability=ability[ability["Nation"]==home].drop(["Nation"],axis=1)
    else:
        print(home," not found")
    if ability["Nation"].str.contains(away).any():
        away_ability=ability[ability["Nation"]==away].drop(["Nation"],axis=1)
    else:
        print(away," not found")
    ability_diff=home_ability.reset_index()-away_ability.reset_index()
    return ability_diff.drop(["index"],axis=1).rename(columns={"Rank":"rank_diff"})

def result_predict(home,away):

    winner=""
    loser=''
    result_model=joblib.load("result_classification.pkl")
    goal_model=joblib.load("goal_diff_reg.pkl")
    #The data for each model predict
    goal_dif_label=['overall',
        'height_cm', 'weight_kg', 'shooting', 'passing', 'dribbling',
        'defending', 'physic', 'attacking_crossing', 'attacking_finishing',
        'attacking_short_passing', 'attacking_volleys', 'skill_dribbling',
        'skill_curve', 'skill_fk_accuracy', 'skill_long_passing',
        'skill_ball_control', 'movement_reactions', 'power_shot_power',
        'power_long_shots', 'mentality_positioning', 'mentality_vision',
        'mentality_composure', 'goalkeeping_speed', 'rank_diff']
    result_label=['overall', 'height_cm',
       'weight_kg', 'shooting', 'passing', 'dribbling', 'defending', 'physic',
       'attacking_crossing', 'attacking_finishing', 'attacking_short_passing',
       'attacking_volleys', 'skill_dribbling', 'skill_curve',
       'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control',
       'movement_reactions', 'power_shot_power', 'power_stamina',
       'power_strength', 'power_long_shots', 'mentality_interceptions',
       'mentality_positioning', 'mentality_vision', 'mentality_composure',
       'defending_standing_tackle', 'defending_sliding_tackle',
       'goalkeeping_speed']
    

    #Ability difference between home team and away team (Diff= Home - away)
    data=ability_diff(home,away)

    # result predict
    x=data[result_label]
    result=result_model.predict(x)
    result_prob=result_model.predict_proba(x)
    
    #creat new dataframe
    new=pd.DataFrame({"Result":result})
    goal_data=data[goal_dif_label]
    new=pd.concat([new,goal_data.reset_index(drop=True)],axis=1)
    
    #goal diff predict
    goal_dif=goal_model.predict(new)
    
    #Result judgment based on the result and goal diff
    if result[0]==1 and round(goal_dif[0])>=1:
        winner=home
        loser=away
    elif result[0]==1 and round(goal_dif[0])<=0:
        winner="tie"
        loser='tie'
    elif result[0]==0 and abs(round(goal_dif[0]))<1:
        winner="tie"
        loser='tie'
    elif result[0]==0:
        winner=away
        loser=home

    return winner,loser, result,result_prob,round(goal_dif[0])

In [36]:
#Group Stage
#We start the simulation same as the result of straws from World Cup 2022

schedule=pd.read_csv("2022 schedule.csv",encoding="utf-8-sig")
label=["Group","Home Team Name","Away Team Name"]
schedule=schedule[label]

table_group_stage=[]
#display(schedule)
for i in range(0,48):
    group=schedule['Group'].iloc[i]
    home=schedule["Home Team Name"].iloc[i]
    away=schedule["Away Team Name"].iloc[i]
    print(group," Game: ",i+1," ",home," vs ",away)
    winner,loser,result,result_prob,goal_diff=result_predict(home,away)
    if result[0]==1:
        prob=result_prob[0][1]
    else:
        prob=result_prob[0][0]
    print("Winner: ",winner," has probability of ",prob," win, the winner goal difference is ",abs(goal_diff),'\n')
    record=[group,home,away,winner,loser,abs(goal_diff),prob]
    table_group_stage.append(record)
table_group_stage_frame=pd.DataFrame(table_group_stage,columns=["Group","Home","Away","Winner","Loser","Winner Goal Diff",'Winner Probability'])
display(table_group_stage_frame)

Group A  Game:  1   Qatar  vs  Ecuador
Winner:  Qatar  has probability of  0.61  win, the winner goal difference is  1 

Group A  Game:  2   Senegal  vs  Netherlands
Winner:  Netherlands  has probability of  0.91  win, the winner goal difference is  1 

Group A  Game:  3   Qatar  vs  Senegal
Winner:  Senegal  has probability of  0.59  win, the winner goal difference is  1 

Group A  Game:  4   Netherlands  vs  Ecuador
Winner:  Netherlands  has probability of  0.87  win, the winner goal difference is  2 

Group A  Game:  5   Netherlands  vs  Qatar
Winner:  Netherlands  has probability of  0.6  win, the winner goal difference is  1 

Group A  Game:  6   Ecuador  vs  Senegal
Winner:  Senegal  has probability of  0.74  win, the winner goal difference is  1 

Group B  Game:  7   England  vs  IR Iran
Winner:  England  has probability of  0.75  win, the winner goal difference is  2 

Group B  Game:  8   USA  vs  Wales
Winner:  Wales  has probability of  0.59  win, the winner goal difference i

Unnamed: 0,Group,Home,Away,Winner,Loser,Winner Goal Diff,Winner Probability
0,Group A,Qatar,Ecuador,Qatar,Ecuador,1,0.61
1,Group A,Senegal,Netherlands,Netherlands,Senegal,1,0.91
2,Group A,Qatar,Senegal,Senegal,Qatar,1,0.59
3,Group A,Netherlands,Ecuador,Netherlands,Ecuador,2,0.87
4,Group A,Netherlands,Qatar,Netherlands,Qatar,1,0.6
5,Group A,Ecuador,Senegal,Senegal,Ecuador,1,0.74
6,Group B,England,IR Iran,England,IR Iran,2,0.75
7,Group B,USA,Wales,Wales,USA,1,0.59
8,Group B,Wales,IR Iran,IR Iran,Wales,1,0.53
9,Group B,England,USA,England,USA,2,0.8


In [37]:
#Group stage result analysis
#Show the record of each team and find out the advanced list

table_group_stage_frame=table_group_stage_frame.iloc[:,0:6]
nation_list=pd.read_csv("2022 World Cup Player.csv",encoding="utf-8-sig")["Nation"].to_list()
record=pd.DataFrame({"Nation":nation_list,"Group":np.zeros((32*1)),"Win":np.zeros((32*1)),"Tie":np.zeros((32*1)),"Lose":np.zeros((32*1)),"Goal Diff":np.zeros((32*1))})

for i in range(0,48):

    #winner search
    winner=table_group_stage_frame["Winner"].iloc[i]
    group=table_group_stage_frame["Group"].iloc[i]
    goal_diff=table_group_stage_frame['Winner Goal Diff'].iloc[i]
    if winner=="tie":
        winner=table_group_stage_frame["Home"].iloc[i]
        index=record[record["Nation"]==winner]["Tie"].index
        origin=record["Tie"].iloc[index[0]]
        record["Tie"].iloc[index[0]]=origin+1
        record["Group"].iloc[index[0]]=group
    else:
        index=record[record["Nation"]==winner]["Win"].index
        origin=record["Win"].iloc[index[0]]
        record["Win"].iloc[index[0]]=origin+1
        record["Group"].iloc[index[0]]=group
        record["Goal Diff"].iloc[index[0]]=record["Goal Diff"].iloc[index[0]]+goal_diff

    #loser search
    loser=table_group_stage_frame["Loser"].iloc[i]
    if loser=="tie":
        loser=table_group_stage_frame["Away"].iloc[i]
        index=record[record["Nation"]==loser]["Tie"].index
        origin=record["Tie"].iloc[index[0]]
        record["Tie"].iloc[index[0]]=origin+1
        record["Group"].iloc[index[0]]=group
    else:
        index=record[record["Nation"]==loser]["Lose"].index
        origin=record["Lose"].iloc[index[0]]
        record["Lose"].iloc[index[0]]=origin+1
        record["Group"].iloc[index[0]]=group
        record["Goal Diff"].iloc[index[0]]=record["Goal Diff"].iloc[index[0]]-goal_diff
    
record_sort=record.sort_values(["Group","Win","Goal Diff"],ascending=[True,False,False])
display("The record of each team: ",record_sort)

#find each group advanced team
group_label=["Group A","Group B","Group C","Group D","Group E","Group F","Group G","Group H"]
advancd_list=pd.DataFrame()
for label in group_label:
    team=record_sort[record_sort["Group"]==label].iloc[0:2,:]
    advancd_list=pd.concat([advancd_list,team],axis=0)
display('The advanced team in each group: ',advancd_list)

#make next round list
Round_of_16_list=pd.DataFrame()
for i in range(0,16,4):
    first=advancd_list["Nation"].iloc[0+i]
    second=advancd_list["Nation"].iloc[i+3]
    combine_1=pd.DataFrame({"Home":[first],"Away":[second]})
    third=advancd_list["Nation"].iloc[1+i]
    fourth=advancd_list["Nation"].iloc[2+i]
    combine_2=pd.DataFrame({"Home":[third],"Away":[fourth]})
    Round_of_16_list=pd.concat([Round_of_16_list,combine_1],axis=0)
    Round_of_16_list=pd.concat([Round_of_16_list,combine_2],axis=0)
display('The Bracket of Knockout Stage: ',Round_of_16_list)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  record["Win"].iloc[index[0]]=origin+1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  record["Group"].iloc[index[0]]=group
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  record["Goal Diff"].iloc[index[0]]=record["Goal Diff"].iloc[index[0]]+goal_diff
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  record["Lose"

'The record of each team: '

Unnamed: 0,Nation,Group,Win,Tie,Lose,Goal Diff
1,Netherlands,Group A,3.0,0.0,0.0,4.0
3,Senegal,Group A,2.0,0.0,1.0,1.0
2,Qatar,Group A,1.0,0.0,2.0,-1.0
0,Ecuador,Group A,0.0,0.0,3.0,-4.0
4,England,Group B,3.0,0.0,0.0,5.0
7,Wales,Group B,1.0,0.0,2.0,-1.0
5,IR Iran,Group B,1.0,0.0,2.0,-2.0
6,USA,Group B,1.0,0.0,2.0,-2.0
8,Argentina,Group C,3.0,0.0,0.0,4.0
9,Mexico,Group C,2.0,0.0,1.0,2.0


'The advanced team in each group: '

Unnamed: 0,Nation,Group,Win,Tie,Lose,Goal Diff
1,Netherlands,Group A,3.0,0.0,0.0,4.0
3,Senegal,Group A,2.0,0.0,1.0,1.0
4,England,Group B,3.0,0.0,0.0,5.0
7,Wales,Group B,1.0,0.0,2.0,-1.0
8,Argentina,Group C,3.0,0.0,0.0,4.0
9,Mexico,Group C,2.0,0.0,1.0,2.0
14,France,Group D,3.0,0.0,0.0,4.0
13,Denmark,Group D,2.0,0.0,1.0,2.0
17,Germany,Group E,3.0,0.0,0.0,3.0
19,Spain,Group E,2.0,0.0,1.0,2.0


'The Bracket of Knockout Stage: '

Unnamed: 0,Home,Away
0,Netherlands,Wales
0,Senegal,England
0,Argentina,Denmark
0,Mexico,France
0,Germany,Croatia
0,Spain,Belgium
0,Switzerland,Uruguay
0,Serbia,Portugal


In [38]:
#Round of 16 predict
table_16=[]
for i in range(0,8):
    home=Round_of_16_list["Home"].iloc[i]
    away=Round_of_16_list["Away"].iloc[i]
    print("Game: ",i+1," ",home," vs ",away)
    winner,loser,result,result_prob,goal_diff=result_predict(home,away)
    if result[0]==1:
        prob=result_prob[0][1]
    else:
        prob=result_prob[0][0]
    print("Winner: ",winner," has abillity of ",prob," win, the winner goal difference is ",abs(goal_diff),'\n')
    record=[home,away,winner,loser,abs(goal_diff),prob]
    table_16.append(record)

table_16_frame=pd.DataFrame(table_16,columns=["Home","Away","Winner","Loser","Winner Goal Diff",'Winner Probability'])
display(table_16_frame)

Game:  1   Netherlands  vs  Wales
Winner:  Netherlands  has abillity of  0.87  win, the winner goal difference is  1 

Game:  2   Senegal  vs  England
Winner:  England  has abillity of  0.92  win, the winner goal difference is  1 

Game:  3   Argentina  vs  Denmark
Winner:  Argentina  has abillity of  0.51  win, the winner goal difference is  1 

Game:  4   Mexico  vs  France
Winner:  France  has abillity of  0.88  win, the winner goal difference is  1 

Game:  5   Germany  vs  Croatia
Winner:  Germany  has abillity of  0.51  win, the winner goal difference is  2 

Game:  6   Spain  vs  Belgium
Winner:  Spain  has abillity of  0.54  win, the winner goal difference is  2 

Game:  7   Switzerland  vs  Uruguay
Winner:  Uruguay  has abillity of  0.82  win, the winner goal difference is  1 

Game:  8   Serbia  vs  Portugal
Winner:  Portugal  has abillity of  0.91  win, the winner goal difference is  1 



Unnamed: 0,Home,Away,Winner,Loser,Winner Goal Diff,Winner Probability
0,Netherlands,Wales,Netherlands,Wales,1,0.87
1,Senegal,England,England,Senegal,1,0.92
2,Argentina,Denmark,Argentina,Denmark,1,0.51
3,Mexico,France,France,Mexico,1,0.88
4,Germany,Croatia,Germany,Croatia,2,0.51
5,Spain,Belgium,Spain,Belgium,2,0.54
6,Switzerland,Uruguay,Uruguay,Switzerland,1,0.82
7,Serbia,Portugal,Portugal,Serbia,1,0.91


In [39]:
#Find out the advanced team of Quarter-finals

quarter_final_list=pd.DataFrame()
for i in range(0,8,4):
    first=table_16_frame["Winner"].iloc[0+i]
    second=table_16_frame["Winner"].iloc[i+2]
    combine_1=pd.DataFrame({"Home":[first],"Away":[second]})
    third=table_16_frame["Winner"].iloc[1+i]
    fourth=table_16_frame["Winner"].iloc[3+i]
    combine_2=pd.DataFrame({"Home":[third],"Away":[fourth]})
    quarter_final_list=pd.concat([quarter_final_list,combine_1],axis=0)
    quarter_final_list=pd.concat([quarter_final_list,combine_2],axis=0)
display(quarter_final_list)

Unnamed: 0,Home,Away
0,Netherlands,Argentina
0,England,France
0,Germany,Uruguay
0,Spain,Portugal


In [40]:
#Quarter-finals result prediction

table_4=[]
for i in range(0,4):
    home=quarter_final_list["Home"].iloc[i]
    away=quarter_final_list["Away"].iloc[i]
    print("Game: ",1+i," ",home," vs ",away)
    winner,loser,result,result_prob,goal_diff=result_predict(home,away)
    if result[0]==1:
        prob=result_prob[0][1]
    else:
        prob=result_prob[0][0]
    print("Winner: ",winner," has abillity of ",prob," win, the winner goal difference is ",abs(goal_diff),'\n')
    record=[home,away,winner,loser,abs(goal_diff),prob]
    table_4.append(record)

table_4_frame=pd.DataFrame(table_4,columns=["Home","Away","Winner","Loser","Winner Goal Diff","Winner Probability"])
display(table_4_frame)

Game:  1   Netherlands  vs  Argentina
Winner:  Argentina  has abillity of  0.8  win, the winner goal difference is  1 

Game:  2   England  vs  France
Winner:  France  has abillity of  0.76  win, the winner goal difference is  1 

Game:  3   Germany  vs  Uruguay
Winner:  Uruguay  has abillity of  0.61  win, the winner goal difference is  1 

Game:  4   Spain  vs  Portugal
Winner:  Spain  has abillity of  0.52  win, the winner goal difference is  2 



Unnamed: 0,Home,Away,Winner,Loser,Winner Goal Diff,Winner Probability
0,Netherlands,Argentina,Argentina,Netherlands,1,0.8
1,England,France,France,England,1,0.76
2,Germany,Uruguay,Uruguay,Germany,1,0.61
3,Spain,Portugal,Spain,Portugal,2,0.52


In [41]:
#Find out the advanced team of Semi-finals

semi_finals_list=pd.DataFrame()
first=table_4_frame["Winner"].iloc[0]
second=table_4_frame["Winner"].iloc[2]
combine_1=pd.DataFrame({"Home":[first],"Away":[second]})
third=table_4_frame["Winner"].iloc[1]
fourth=table_4_frame["Winner"].iloc[3]
combine_2=pd.DataFrame({"Home":[third],"Away":[fourth]})
semi_finals_list=pd.concat([semi_finals_list,combine_1],axis=0)
semi_finals_list=pd.concat([semi_finals_list,combine_2],axis=0)
display(semi_finals_list)

Unnamed: 0,Home,Away
0,Argentina,Uruguay
0,France,Spain


In [42]:
#Semi-finals result prediction

table_2=[]
for i in range(0,2):
    home=semi_finals_list["Home"].iloc[i]
    away=semi_finals_list["Away"].iloc[i]
    print("Game: ",i," ",home," vs ",away)
    winner,loser,result,result_prob,goal_diff=result_predict(home,away)
    if result[0]==1:
        prob=result_prob[0][1]
    else:
        prob=result_prob[0][0]
    print("Winner: ",winner," has abillity of ",prob," win, the winner goal difference is ",abs(goal_diff),'\n')
    record=[home,away,winner,loser,abs(goal_diff),prob]
    table_2.append(record)

table_2_frame=pd.DataFrame(table_2,columns=["Home","Away","Winner","Loser","Winner Goal Diff","Winner Probability"])
display(table_2_frame)

Game:  0   Argentina  vs  Uruguay
Winner:  Argentina  has abillity of  0.53  win, the winner goal difference is  1 

Game:  1   France  vs  Spain
Winner:  Spain  has abillity of  0.7  win, the winner goal difference is  1 



Unnamed: 0,Home,Away,Winner,Loser,Winner Goal Diff,Winner Probability
0,Argentina,Uruguay,Argentina,Uruguay,1,0.53
1,France,Spain,Spain,France,1,0.7


In [43]:
#Find out the advanced team of finals

first=table_2_frame["Winner"].iloc[0]
second=table_2_frame["Winner"].iloc[1]
championship=pd.DataFrame({"Home":[first],"Away":[second]})
third=table_2_frame["Loser"].iloc[0]
fourth=table_2_frame["Loser"].iloc[1]
match_for_third=pd.DataFrame({"Home":[third],"Away":[fourth]})

display(championship)
display(match_for_third)

Unnamed: 0,Home,Away
0,Argentina,Spain


Unnamed: 0,Home,Away
0,Uruguay,France


In [44]:
#final and match for third result prediction
final=[]
game=["Final","Third place play-off"]
a=0
for i in [championship,match_for_third]:

    home=i["Home"].iloc[0]
    away=i["Away"].iloc[0]
    print(game[a]," ",home," vs ",away)
    winner,loser,result,result_prob,goal_diff=result_predict(home,away)
    if result[0]==1:
        prob=result_prob[0][1]
    else:
        prob=result_prob[0][0]
    print("Winner: ",winner," has abillity of ",prob," win, the winner goal difference is ",abs(goal_diff),'\n')
    record=[home,away,winner,loser,abs(goal_diff),prob]
    final.append(record)
    a=a+1
final_frame=pd.DataFrame(final,columns=["Home","Away","Winner","Loser","Winner Goal Diff","Winner Probability"])
display(final_frame)

Final   Argentina  vs  Spain
Winner:  Spain  has abillity of  0.57  win, the winner goal difference is  1 

Third place play-off   Uruguay  vs  France
Winner:  France  has abillity of  0.89  win, the winner goal difference is  1 



Unnamed: 0,Home,Away,Winner,Loser,Winner Goal Diff,Winner Probability
0,Argentina,Spain,Spain,Argentina,1,0.57
1,Uruguay,France,France,Uruguay,1,0.89
