In [96]:
using ExcelFiles, CSVFiles, DataFrames, Distributions

In [97]:
group_phase = load("data/group-phase.xlsx", "Sheet1") |> DataFrame;
team_rankings = load("data/team-rankings.csv") |> DataFrame;

In [98]:
group_phase[1:3, [:TeamA, :ScoreA, :ScoreB, :TeamB]]

Unnamed: 0,TeamA,ScoreA,ScoreB,TeamB
1,Russia,3.0,2.0,Saudi Arabia
2,Egypt,1.0,2.0,Uruguay
3,Portugal,,,Spain


In [99]:
# Convert team rankings to dictionary for easier lookup
rankings = Dict()
max_points = maximum(team_rankings[:Points])
for i in 1:length(team_rankings[:Team])
    row = team_rankings[i, [:Team, :Points, :Bias]]
    rankings[row[:Team][1]] = (row[:Points][1] / max_points, row[:Bias][1])
end
rankings["Senegal"]

(0.5378690629011553, 1.0)

In [112]:
# randomize game outcomes
SEED = 11235
srand(SEED)
d = Normal()

struct MatchResult
    team_a::String
    team_b::String
    score_a::Number
    score_b::Number
end

# A three-way coin toss
# Returns a MatchResult for a game
function score_match(left, right)    
    left_strength = relative_score(left, right)
    right_strength = relative_score(right, left)
    
    left_score = game_success(left_strength)
    right_score = game_success(right_strength)
    
    spread = mix(0.2, 3, abs(left_score - right_score))
    loser = mix(0, 3, max(0, 0.33 + min(left_score, right_score)))
    loss_score = trunc(Int, round(loser))
    win_score = trunc(Int, round(loser + spread))
    
    ls = left_score > right_score ? win_score : loss_score
    rs = right_score > left_score ? win_score : loss_score
    
    return MatchResult(left, right, ls, rs)
end

# Scores left team's strength relative to right team
function relative_score(left, right)
    lr = rankings[left] 
    rr = rankings[right]
    return score_team(lr[1] - rr[1], lr[2] - rr[2])
end

# Combines personal bias an relative FIFA ranking into a single number
score_team(points, bias) = points + mix(0.0, 0.25, bias)

# Number of goals based on strength of team (normal distribution)
game_success(strength) = strength + rand(d) * 0.175

function winner(game::MatchResult)
    if game.score_a > game.score_b
        return :Left
    elseif game.score_b > game.score_a
        return :Right
    else
        return :Tie
    end
end

mix(a, b, t) = a + (b - a) * t
; # don't print output

In [113]:
left = 0
right = 0
ties = 0
for i in 1:50
    match = score_match("Portugal", "Spain")
    result = winner(match)
    if result == :Tie
        ties += 1
    elseif result == :Right
        right += 1
    elseif result == :Left
        left += 1
    end
end
(left, right, ties)

(34, 4, 12)

In [115]:
# run simulation
outcomes = []
for i in 1:length(group_phase[:TeamA])
    push!(outcomes, score_match(group_phase[i, :TeamA], group_phase[i, :TeamB]))
end
# assign outcomes to our dataframe
group_phase[:ScoreA] = map(v -> v.score_a, outcomes)
group_phase[:ScoreB] = map(v -> v.score_b, outcomes)
# write outcomes to csv on disk
writetable("output/dw-group-phase-ranked.csv", group_phase)
# view in notebook
group_phase[:, [:TeamA, :ScoreA, :ScoreB, :TeamB]]

Unnamed: 0,TeamA,ScoreA,ScoreB,TeamB
1,Russia,1,2,Saudi Arabia
2,Egypt,1,1,Uruguay
3,Portugal,2,1,Spain
4,Morocco,1,0,Iran
5,France,2,1,Australia
6,Peru,1,1,Denmark
7,Argentina,2,0,Iceland
8,Croatia,2,0,Nigeria
9,Brazil,1,1,Switzerland
10,Costa Rica,2,1,Serbia
