In [2]:
using JLD
using PyPlot


function policy2name(tree_policy::ASCIIString; bWideLabel::Bool = false)
    
    tps = eval(parse(replace(tree_policy, "anonymous function", "")))
    
    if !(typeof(tps) <: Array)
        tps = Any[tps]
    end
    
    tpn = ""
    MS_str = ""
    
    for tp in tps
        scale_str = ""
        if haskey(tp, "bScale") && tp["bScale"] == true
            scale_str = "s"
        end

        if tp["type"] == :UCB1 || tp["type"] == :UCB1_
            if haskey(tp, "c")
                tpn = string(tp["type"]) * scale_str * "(" * string(tp["c"]) * ")"
            else
                tpn = string(tp["type"]) * scale_str
            end
        elseif tp["type"] == :TS || tp["type"] == :TSM
            tpn = string(tp["type"])
        elseif tp["type"] == :AUCB
            sp = ""
            for i = 1:length(tp["SP"])
                sp *= policy2name(string(tp["SP"][i]))
                if i != length(tp["SP"])
                    sp *= ","
                end
            end
            if bWideLabel
                tpn = "A-UCB" * scale_str * "(" * sp * ")"
            else
                tpn = "A-UCB" * scale_str
            end
        elseif tp["type"] == :MS
            if bWideLabel
                MS_str = " MS" * "(" * string(tp["L"]) * "," * string(tp["N"]) * ")"
            else
                MS_str = " MS"
            end
        elseif tp["type"] == :UCB1withCE
            tpn = "UCB1(CE)"
        end
    end
    
    return tpn * MS_str
end


function processDataByScenario(datafile::ASCIIString; datadir::ASCIIString = ".", bWideLabel::Bool = false)
    
    D = load(joinpath(datadir, datafile));
    
    Scenarios = D["Scenarios"]
    TreePolicies = D["TreePolicies"]
    Results = D["Results"]
    
    R = Dict()
    
    for (key, results) in Results
        scenario, tree_policy = key
        returns = results["returns"]
        if !haskey(R, scenario)
            R[scenario] = Dict()
        end
        tpn = policy2name(tree_policy, bWideLabel = bWideLabel)
        R[scenario][tpn] = Dict()
        R[scenario][tpn]["mean"] = mean(returns)
        R[scenario][tpn]["std"] = std(returns)
        R[scenario][tpn]["N"] = results["N"]
        R[scenario][tpn]["mean std"] = R[scenario][tpn]["std"] / sqrt(results["N"])
        R[scenario][tpn]["RE"] = R[scenario][tpn]["mean std"] / abs(R[scenario][tpn]["mean"])
    end
    
    for scenario in Scenarios
        R_ = R[scenario]
        println("Scenario: ", scenario)
        for tree_policy in TreePolicies[scenario]
            tpn = policy2name(tree_policy, bWideLabel = bWideLabel)
            if bWideLabel
                println("  policy: ", @sprintf("%-36s", tpn), ", mean: ", @sprintf("%9.2f", R_[tpn]["mean"]), ", RE: ", @sprintf("%8.2f", R_[tpn]["RE"]))
            else
                println("  policy: ", @sprintf("%-16s", tpn), ", mean: ", @sprintf("%9.2f", R_[tpn]["mean"]), ", RE: ", @sprintf("%8.2f", R_[tpn]["RE"]))
            end
        end
        println()
    end
end


function processDataByTreePolicy(datafile::ASCIIString; datadir::ASCIIString = ".", bWideLabel::Bool = false, filter::Vector{Int64} = Int64[], debug::Int64 = 0)

    D = load(joinpath(datadir, datafile));
    
    Scenarios = D["Scenarios"]
    TreePolicies = D["TreePolicies"]
    Results = D["Results"]
    
    TPN_list = map(x -> policy2name(x, bWideLabel = bWideLabel), TreePolicies[Scenarios[1]])
    
    R = Dict()
    for scenario in Scenarios
        R[scenario] = Dict()
    end
    
    for (key, results) in Results
        scenario, tree_policy = key
        returns = results["returns"]
        tpn = policy2name(tree_policy, bWideLabel = bWideLabel)
        R[scenario]["opt"] = results["opt_return"]
        #R[scenario][tpn] = mean((results["opt_return"] - returns) ./ abs(results["opt_return"]))
        R[scenario][tpn] = (results["opt_return"] - returns) ./ abs(results["opt_return"])
    end
    
    if debug > 0
        for scenario in Scenarios
            println("Scenario: ", scenario, ", opt: ", @sprintf("%.2f", R[scenario]["opt"]))
            for tpn in TPN_list
                if bWideLabel
                    println("  policy: ", @sprintf("%-36s", tpn), ", value: ", @sprintf("%.2f", R[scenario][tpn]))
                else
                    println("  policy: ", @sprintf("%-16s", tpn), ", value: ", @sprintf("%.2f", R[scenario][tpn]))
                end
            end
            println()
        end
        println()
    end
    
    C = Dict()
    for tpn in TPN_list
        C[tpn] = Float64[]
    end
    
    for scenario in Scenarios
        if !(scenario in filter)
            for tpn in TPN_list
                #push!(C[tpn], R[scenario][tpn])
                append!(C[tpn], R[scenario][tpn])
            end
        end
    end
    
    for tpn in TPN_list
        if bWideLabel
            println("policy: ", @sprintf("%-36s", tpn), ", N: ", @sprintf("%4d", length(C[tpn])), ", mean: ", @sprintf("%12.6f", mean(C[tpn])), ", 95% conf. int.: ", @sprintf("%12.6f", 1.96 * std(C[tpn]) / length(C[tpn])))
        else
            println("policy: ", @sprintf("%-16s", tpn), ", N: ", @sprintf("%4d", length(C[tpn])), ", mean: ", @sprintf("%12.6f", mean(C[tpn])), ", 95% conf. int.: ", @sprintf("%12.6f", 1.96 * std(C[tpn]) / length(C[tpn])))
        end
    end
end


function processDataByTreePolicyToRatio(datafile::ASCIIString; datadir::ASCIIString = ".", TPN_list::Vector{ASCIIString} = ASCIIString[], ref::ASCIIString = "mean", bWideLabel::Bool = false, filter::Vector{Int64} = Int64[], debug::Int64 = 0)
    
    D = load(joinpath(datadir, datafile));
    
    Scenarios = D["Scenarios"]
    TreePolicies = D["TreePolicies"]
    Results = D["Results"]
    
    if length(TPN_list) == 0
        TPN_list = map(x -> policy2name(x, bWideLabel = bWideLabel), TreePolicies[Scenarios[1]])
    end
    
    R = Dict()
    for scenario in Scenarios
        R[scenario] = Dict()
    end
    
    for (key, results) in Results
        scenario, tree_policy = key
        returns = results["returns"]
        tpn = policy2name(tree_policy, bWideLabel = bWideLabel)
        R[scenario][tpn] = Dict()
        R[scenario][tpn]["mean"] = mean(returns)
        R[scenario][tpn]["std"] = std(returns)
    end

    K = Dict()
    for scenario in Scenarios
        K[scenario] = Dict()
    end
    
    for scenario in Scenarios
        if ref == "mean"
            x = 0
            for tpn in TPN_list
                x += R[scenario][tpn]["mean"]
            end
            x /= length(TPN_list)
            K[scenario]["ref"] = x
        else
            K[scenario]["ref"] = R[scenario][ref]["mean"]
        end
        for tpn in TPN_list
            K[scenario][tpn] = (R[scenario][tpn]["mean"] - K[scenario]["ref"]) / abs(K[scenario]["ref"])
        end
    end
    
    if debug > 0
        for scenario in Scenarios
            println("Scenario: ", scenario, ", mean: ", @sprintf("%.2f", K[scenario]["ref"]))
            for tpn in TPN_list
                if bWideLabel
                    println("  policy: ", @sprintf("%-36s", tpn), ", value: ", @sprintf("%.2f", K[scenario][tpn]))
                else
                    println("  policy: ", @sprintf("%-16s", tpn), ", value: ", @sprintf("%.2f", K[scenario][tpn]))
                end
            end
            println()
        end
        println()
    end
    
    C = Dict()
    for tpn in TPN_list
        C[tpn] = Float64[]
    end
    
    for scenario in Scenarios
        if !(scenario in filter)
            for tpn in TPN_list
                push!(C[tpn], K[scenario][tpn])
            end
        end
    end
    
    for tpn in TPN_list
        if bWideLabel
            println("policy: ", @sprintf("%-36s", tpn), ", N: ", @sprintf("%4d", length(C[tpn])), ", mean: ", @sprintf("%12.6f", mean(C[tpn])), ", 95% conf. int.: ", @sprintf("%12.6f", 1.96 * std(C[tpn]) / length(C[tpn])))
        else
            println("policy: ", @sprintf("%-16s", tpn), ", N: ", @sprintf("%4d", length(C[tpn])), ", mean: ", @sprintf("%12.6f", mean(C[tpn])), ", 95% conf. int.: ", @sprintf("%12.6f", 1.96 * std(C[tpn]) / length(C[tpn])))
        end
    end
end


function processDataByTreePolicyToRatio2(datafile::ASCIIString; datadir::ASCIIString = ".", TPN_list::Vector{ASCIIString} = ASCIIString[], ref::ASCIIString = "mean", bWideLabel::Bool = false, filter::Vector{Int64} = Int64[], debug::Int64 = 0)
    
    D = load(joinpath(datadir, datafile));
    
    Scenarios = D["Scenarios"]
    TreePolicies = D["TreePolicies"]
    Results = D["Results"]
    
    if length(TPN_list) == 0
        TPN_list = map(x -> policy2name(x, bWideLabel = bWideLabel), TreePolicies[Scenarios[1]])
    end
    
    K = Dict()
    for scenario in Scenarios
        K[scenario] = Dict()
    end
    
    for (key, results) in Results
        scenario, tree_policy = key
        returns = results["returns"]
        tpn = policy2name(tree_policy, bWideLabel = bWideLabel)
        K[scenario][tpn] = returns
        K[scenario]["N"] = length(returns)
    end
    
    for scenario in Scenarios
        K[scenario]["mean"] = zeros(K[scenario]["N"])
        for tpn in TPN_list
            K[scenario]["mean"] += K[scenario][tpn]
        end
        K[scenario]["mean"] /= length(TPN_list)
    end
    
    R = Dict()
    for scenario in Scenarios
        R[scenario] = Dict()
    end
    
    for scenario in Scenarios
        for tpn in TPN_list
            R[scenario][tpn] = mean((K[scenario][tpn] - K[scenario]["mean"]) ./ abs(K[scenario]["mean"]))
        end
    end
    
    if debug > 0
        for scenario in Scenarios
            println("Scenario: ", scenario)
            for tpn in TPN_list
                if bWideLabel
                    println("  policy: ", @sprintf("%-36s", tpn), ", value: ", @sprintf("%12.6f", R[scenario][tpn]))
                else
                    println("  policy: ", @sprintf("%-16s", tpn), ", value: ", @sprintf("%12.6f", R[scenario][tpn]))
                end
            end
            println()
        end
        println()
    end
    
    C = Dict()
    for tpn in TPN_list
        C[tpn] = Float64[]
    end
    
    for scenario in Scenarios
        if !(scenario in filter)
            for tpn in TPN_list
                push!(C[tpn], R[scenario][tpn])
            end
        end
    end
    
    for tpn in TPN_list
        if bWideLabel
            println("policy: ", @sprintf("%-36s", tpn), ", N: ", @sprintf("%4d", length(C[tpn])), ", mean: ", @sprintf("%12.6f", mean(C[tpn])), ", 95% conf. int.: ", @sprintf("%12.6f", 1.96 * std(C[tpn]) / length(C[tpn])))
        else
            println("policy: ", @sprintf("%-16s", tpn), ", N: ", @sprintf("%4d", length(C[tpn])), ", mean: ", @sprintf("%12.6f", mean(C[tpn])), ", 95% conf. int.: ", @sprintf("%12.6f", 1.96 * std(C[tpn]) / length(C[tpn])))
        end
    end
end

;

## Enhanced Tree Policies

In [71]:
processDataByScenario("exp.jld", datadir = "data_ijcai_02");

Scenario: 26229
  policy: UCB1(100)       , mean:     35.79, RE:     0.07
  policy: UCB1(10000)     , mean:     29.32, RE:     0.05
  policy: TS              , mean:     54.95, RE:     0.08
  policy: TSM             , mean:     32.84, RE:     0.07
  policy: A-UCB           , mean:     31.77, RE:     0.07
  policy: UCB1(CE)        , mean:   -162.95, RE:     1.23

Scenario: 13157
  policy: UCB1(100)       , mean:  -1513.07, RE:     0.38
  policy: UCB1(10000)     , mean:   -213.03, RE:     1.88
  policy: TS              , mean:    197.78, RE:     0.03
  policy: TSM             , mean:   -193.49, RE:     1.45
  policy: A-UCB           , mean:    182.90, RE:     0.04
  policy: UCB1(CE)        , mean:  -1424.71, RE:     0.42

Scenario: 10978
  policy: UCB1(100)       , mean:    230.90, RE:     0.00
  policy: UCB1(10000)     , mean:    230.29, RE:     0.00
  policy: TS              , mean:    228.88, RE:     0.00
  policy: TSM             , mean:    229.79, RE:     0.00
  policy: A-UCB       

In [72]:
processDataByTreePolicy("exp.jld", datadir = "data_ijcai_02")

policy: UCB1(100)       , N: 9800, mean:     8.144030, 95% conf. int.:     0.005475
policy: UCB1(10000)     , N: 9800, mean:     5.056054, 95% conf. int.:     0.004149
policy: TS              , N: 9800, mean:     5.367236, 95% conf. int.:     0.004306
policy: TSM             , N: 9800, mean:     5.125547, 95% conf. int.:     0.004131
policy: A-UCB           , N: 9800, mean:     5.554401, 95% conf. int.:     0.004330
policy: UCB1(CE)        , N: 9800, mean:     7.468605, 95% conf. int.:     0.005135


In [73]:
processDataByTreePolicyToRatio("exp.jld", datadir = "data_ijcai_02")

policy: UCB1(100)       , N:   98, mean:    -0.488970, 95% conf. int.:     0.063415
policy: UCB1(10000)     , N:   98, mean:     0.200877, 95% conf. int.:     0.028401
policy: TS              , N:   98, mean:     0.371706, 95% conf. int.:     0.035074
policy: TSM             , N:   98, mean:     0.287599, 95% conf. int.:     0.024773
policy: A-UCB           , N:   98, mean:     0.325236, 95% conf. int.:     0.023945
policy: UCB1(CE)        , N:   98, mean:    -0.696447, 95% conf. int.:     0.095470


In [78]:
processDataByTreePolicyToRatio2("exp.jld", datadir = "data_ijcai_02", filter = [20539])

policy: UCB1(100)       , N:   97, mean:     0.483250, 95% conf. int.:     0.037127
policy: UCB1(10000)     , N:   97, mean:    -0.214009, 95% conf. int.:     0.019271
policy: TS              , N:   97, mean:    -0.177042, 95% conf. int.:     0.017220
policy: TSM             , N:   97, mean:    -0.196406, 95% conf. int.:     0.014108
policy: A-UCB           , N:   97, mean:    -0.135429, 95% conf. int.:     0.015282
policy: UCB1(CE)        , N:   97, mean:     0.239636, 95% conf. int.:     0.039399


## Splitting Parameter Study

In [122]:
processDataByScenario("exp_MS_params.jld", datadir = "data_ijcai", bWideLabel = true)

Scenario: 26229
  policy: UCB1(10000)                         , mean:     29.32, RE:     0.05
  policy: UCB1(10000) MS([500.0],[2])         , mean:     30.46, RE:     0.05
  policy: UCB1(10000) MS([500.0],[4])         , mean:     33.19, RE:     0.08
  policy: UCB1(10000) MS([500.0,200.0],[2,2]) , mean:     30.78, RE:     0.06

Scenario: 13157
  policy: UCB1(10000)                         , mean:   -213.03, RE:     1.88
  policy: UCB1(10000) MS([500.0],[2])         , mean:   -522.29, RE:     0.74
  policy: UCB1(10000) MS([500.0],[4])         , mean:   -309.63, RE:     0.84
  policy: UCB1(10000) MS([500.0,200.0],[2,2]) , mean:   -426.85, RE:     0.74

Scenario: 10978
  policy: UCB1(10000)                         , mean:    230.29, RE:     0.00
  policy: UCB1(10000) MS([500.0],[2])         , mean:    230.22, RE:     0.00
  policy: UCB1(10000) MS([500.0],[4])         , mean:    231.15, RE:     0.00
  policy: UCB1(10000) MS([500.0,200.0],[2,2]) , mean:    230.91, RE:     0.00

Scenario: 183

In [123]:
processDataByTreePolicy("exp_MS_params.jld", datadir = "data_ijcai", bWideLabel = true, filter = [29593, 10521, 18272, 20328, 11642])

policy: UCB1(10000)                         , N: 9500, mean:     1.744272, 95% conf. int.:     0.002032
policy: UCB1(10000) MS([500.0],[2])         , N: 9500, mean:     1.891262, 95% conf. int.:     0.002115
policy: UCB1(10000) MS([500.0],[4])         , N: 9500, mean:     2.231557, 95% conf. int.:     0.002412
policy: UCB1(10000) MS([500.0,200.0],[2,2]) , N: 9500, mean:     1.882971, 95% conf. int.:     0.002153


In [124]:
processDataByTreePolicy("exp_MS_params.jld", datadir = "data_ijcai", bWideLabel = true)

policy: UCB1(10000)                         , N: 10000, mean:     4.961729, 95% conf. int.:     0.004028
policy: UCB1(10000) MS([500.0],[2])         , N: 10000, mean:     5.081115, 95% conf. int.:     0.003974
policy: UCB1(10000) MS([500.0],[4])         , N: 10000, mean:     5.497888, 95% conf. int.:     0.004152
policy: UCB1(10000) MS([500.0,200.0],[2,2]) , N: 10000, mean:     5.200870, 95% conf. int.:     0.004048


In [108]:
processDataByTreePolicyToRatio("exp_MS_params.jld", datadir = "data_ijcai", bWideLabel = true, filter = [29593, 10521, 18272])

policy: UCB1(10000)                         , N:   46, mean:    -0.072989, 95% conf. int.:     0.018833
policy: UCB1(10000) MS([500.0],[2])         , N:   46, mean:     0.077997, 95% conf. int.:     0.021702
policy: UCB1(10000) MS([500.0],[4])         , N:   46, mean:    -0.095737, 95% conf. int.:     0.025982
policy: UCB1(10000) MS([500.0,200.0],[2,2]) , N:   46, mean:     0.090729, 95% conf. int.:     0.021222


In [109]:
processDataByTreePolicyToRatio2("exp_MS_params.jld", datadir = "data_ijcai", bWideLabel = true, filter = [29593, 10521, 18272])

policy: UCB1(10000)                         , N:   46, mean:     0.067539, 95% conf. int.:     0.017969
policy: UCB1(10000) MS([500.0],[2])         , N:   46, mean:    -0.041980, 95% conf. int.:     0.008309
policy: UCB1(10000) MS([500.0],[4])         , N:   46, mean:    -0.025725, 95% conf. int.:     0.004338
policy: UCB1(10000) MS([500.0,200.0],[2,2]) , N:   46, mean:     0.000166, 95% conf. int.:     0.008011


## Enhanced Tree Policies with Splitting

In [12]:
processDataByScenario("exp_MS_01.jld", datadir = "data_ijcai_02")

Scenario: 26229
  policy: UCB1(100)       , mean:     35.79, RE:     0.07
  policy: UCB1(100) MS    , mean:     33.80, RE:     0.07

Scenario: 13157
  policy: UCB1(100)       , mean:  -1513.07, RE:     0.38
  policy: UCB1(100) MS    , mean:  -2411.60, RE:     0.33

Scenario: 10978
  policy: UCB1(100)       , mean:    230.90, RE:     0.00
  policy: UCB1(100) MS    , mean:    231.53, RE:     0.00

Scenario: 18337
  policy: UCB1(100)       , mean:     12.38, RE:     0.32
  policy: UCB1(100) MS    , mean:      9.21, RE:     0.42

Scenario: 28127
  policy: UCB1(100)       , mean:    255.14, RE:     0.00
  policy: UCB1(100) MS    , mean:    255.26, RE:     0.00

Scenario: 22805
  policy: UCB1(100)       , mean:    156.61, RE:     0.05
  policy: UCB1(100) MS    , mean:    141.77, RE:     0.05

Scenario: 22060
  policy: UCB1(100)       , mean:   -176.43, RE:     1.60
  policy: UCB1(100) MS    , mean:    -76.45, RE:     3.92

Scenario: 15358
  policy: UCB1(100)       , mean:    240.41, RE:     

In [14]:
processDataByTreePolicy("exp_MS_01.jld", datadir = "data_ijcai_02")

policy: UCB1(100)       , N: 9800, mean:     8.144030, 95% conf. int.:     0.005475
policy: UCB1(100) MS    , N: 9800, mean:     8.752133, 95% conf. int.:     0.005614


In [58]:
processDataByTreePolicyToRatio("exp_MS_01.jld", datadir = "data_ijcai_02")

policy: UCB1(100)       , N:   98, mean:    -2.847487, 95% conf. int.:     0.483187
policy: UCB1(100) MS    , N:   98, mean:     2.847487, 95% conf. int.:     0.483187


In [70]:
processDataByTreePolicyToRatio2("exp_MS_01.jld", datadir = "data_ijcai_02", filter = [13694])

policy: UCB1(100)       , N:   97, mean:     0.005307, 95% conf. int.:     0.001143
policy: UCB1(100) MS    , N:   97, mean:    -0.005307, 95% conf. int.:     0.001143
