In [2]:
using Graphs

g = simple_graph(5)

inputs = [       # each element is (u, v, dist)
    (1, 2, 10.),
    (1, 3, 5.),
    (2, 3, 2.),
    (3, 2, 3.),
    (2, 4, 1.),
    (3, 5, 2.),
    (4, 5, 4.),
    (5, 4, 6.),
    (5, 1, 7.),
    (3, 4, 9.) ]

ne = length(inputs)
dists = zeros(ne)

for i = 1 : ne
    a = inputs[i]
    add_edge!(g, a[1], a[2])   # add edge
    dists[i] = a[3]             # set distance
end

r = dijkstra_shortest_paths(g, dists, 1)

d = 4
println(r.dists[d])
println(enumerate_paths(vertices(g), r.parent_indices, d))

;

9.0
[1,3,2,4]


In [124]:
using JLD

push!(LOAD_PATH, ".")
using Util


function policy2name(tree_policy)
    
    if tree_policy["type"] == :UCB1
        return "UCB1(" * string(tree_policy["c"]) * ")"
    elseif tree_policy["type"] == :TS || tree_policy["type"] == :TSM
        return string(tree_policy["type"])
    elseif tree_policy["type"] == :AUCB
        sp = ""
        for i = 1:length(tree_policy["SP"])
            sp *= policy2name(tree_policy["SP"][i])
            if i != length(tree_policy["SP"])
                sp *= ","
            end
        end
        if haskey(tree_policy, "CP")
            cp = policy2name(tree_policy["CP"])
        else
            cp = "TSN"
        end
        return "AUCB(" * cp * ")(" * sp * ")"
    end
end


function processData(datafile::ASCIIString; datadir::ASCIIString = ".")
    
    D = load(joinpath(datadir, datafile), "DATA");

    R = Dict()

    for (key, experiment) in D
        scenario, tree_policy = key
        expected_returns = Array{Float64}(experiment["expected_returns"])

        if !haskey(R, scenario)
            R[scenario] = Dict()
        end
        R[scenario]["optimum"] = experiment["opt_dist"]
        tpn = policy2name(tree_policy)
        R[scenario][tpn] = Dict()
        R[scenario][tpn]["mean"] = mean(expected_returns)
        R[scenario][tpn]["std"] = std(expected_returns)
    end

    for (scenario, R_) in R
        println("Scenario: ", scenario, ", Optimum: ", neat(R_["optimum"]))

        #for tpn in ["UCB1(1.0)", "UCB1(300.0)", "TS", "TSM", "AUCB(TSN)(UCB1(1.0),UCB1(300.0))"]
        for tpn in ["UCB1(1.0)", "UCB1(300.0)", "TS", "TSM", "AUCB(TSN)(UCB1(1.0),UCB1(300.0))", "AUCB(TSN)(UCB1(1.0),TSM)"]
            println("  policy: ", @sprintf("%-32s", tpn), ", mean: ", @sprintf("%7s", neat(R_[tpn]["mean"])), ", std: ", @sprintf("%6.2f", neat(R_[tpn]["std"])))
        end

        println()
    end
end

;

In [134]:
processData("exp_conv.jld", datadir = "data01")

Scenario: 20539, Optimum: -833.3
  policy: UCB1(1.0)                       , mean: -1127.0, std: 122.80
  policy: UCB1(300.0)                     , mean:  -951.2, std:  91.96
  policy: TS                              , mean:  -861.6, std:  46.44
  policy: TSM                             , mean:  -857.8, std:  40.99
  policy: AUCB(TSN)(UCB1(1.0),UCB1(300.0)), mean:  -910.0, std:  77.28
  policy: AUCB(TSN)(UCB1(1.0),TSM)        , mean:  -870.4, std:  53.68

Scenario: 10978, Optimum: -858.9
  policy: UCB1(1.0)                       , mean: -1000.0, std:  87.54
  policy: UCB1(300.0)                     , mean:  -923.1, std:  54.88
  policy: TS                              , mean:  -884.0, std:  29.56
  policy: TSM                             , mean:  -885.0, std:  24.99
  policy: AUCB(TSN)(UCB1(1.0),UCB1(300.0)), mean:  -899.9, std:  32.74
  policy: AUCB(TSN)(UCB1(1.0),TSM)        , mean:  -894.1, std:  30.10

Scenario: 18337, Optimum: -989.5
  policy: UCB1(1.0)                       , mea

In [127]:
processData("exp_1000.jld", datadir = "data01")

Scenario: 20539, Optimum: -833.3
  policy: UCB1(1.0)                       , mean: -1126.0, std: 123.40
  policy: UCB1(300.0)                     , mean:  -975.9, std: 103.70
  policy: TS                              , mean:  -875.6, std:  61.35
  policy: TSM                             , mean:  -875.7, std:  60.48
  policy: AUCB(TSN)(UCB1(1.0),UCB1(300.0)), mean:  -918.0, std:  80.74
  policy: AUCB(TSN)(UCB1(1.0),TSM)        , mean:  -879.5, std:  61.94

Scenario: 10978, Optimum: -858.9
  policy: UCB1(1.0)                       , mean: -1000.0, std:  87.61
  policy: UCB1(300.0)                     , mean:  -935.3, std:  60.16
  policy: TS                              , mean:  -903.6, std:  35.72
  policy: TSM                             , mean:  -896.4, std:  25.10
  policy: AUCB(TSN)(UCB1(1.0),UCB1(300.0)), mean:  -908.7, std:  31.89
  policy: AUCB(TSN)(UCB1(1.0),TSM)        , mean:  -904.5, std:  32.63

Scenario: 18337, Optimum: -989.5
  policy: UCB1(1.0)                       , mea

In [125]:
processData("exp_100.jld", datadir = "data01")

Scenario: 20539, Optimum: -833.3
  policy: UCB1(1.0)                       , mean: -1126.0, std: 125.70
  policy: UCB1(300.0)                     , mean:  -999.2, std: 113.50
  policy: TS                              , mean:  -980.8, std: 106.50
  policy: TSM                             , mean:  -975.0, std: 108.60
  policy: AUCB(TSN)(UCB1(1.0),UCB1(300.0)), mean:  -979.2, std: 101.40
  policy: AUCB(TSN)(UCB1(1.0),TSM)        , mean:  -963.8, std: 109.20

Scenario: 27270, Optimum: -855.3
  policy: UCB1(1.0)                       , mean: -1029.0, std: 104.40
  policy: UCB1(300.0)                     , mean:  -971.9, std:  87.66
  policy: TS                              , mean:  -937.1, std:  61.50
  policy: TSM                             , mean:  -941.8, std:  60.77
  policy: AUCB(TSN)(UCB1(1.0),UCB1(300.0)), mean:  -941.7, std:  54.05
  policy: AUCB(TSN)(UCB1(1.0),TSM)        , mean:  -931.7, std:  53.21

Scenario: 10978, Optimum: -858.9
  policy: UCB1(1.0)                       , mea