In [1]:
include("utilities.jl");

In [2]:
# Mask which defines the observations that can be made
V = [1 1 1; 1 0 1; 1 1 1];

In [5]:
# Specify for which γ, how many experiments are done; also specify the size
k = 10^3;
n_averages = 100;
#γRange = collect(1/k:1/k:(1-1/k));
sidelength = 5
w = sidelength;
h = sidelength;
ε = 3*10^-5;
γRange = [1 - ε*k + j*ε for j in 1:(k-1)];

In [6]:
# Read mazes that should be solved
mazes = [zeros(2*sidelength+1, 2*sidelength+1) for j in 1:n_averages];

test = readdlm("mazes/mazes$sidelength.csv", ',', Float64)
test = reshape(test, 100, 2*sidelength+1, 2*sidelength+1)
for j in 1:n_averages
    mazes[j] = test[j,:,:]
end

In [7]:
# Allocate memory for the computations with fixed discount
timesROSA = zeros(length(γRange), n_averages);
rewardsROSA = zeros(length(γRange), n_averages);
statusROSA = [];

In [8]:
@elapsed for j in 1:n_averages
    M = mazes[j];
    states = listOfStates(M);
    goal = rand(states);
    α = transitionKernel(M,A,goal);
    β = observationKernel(M,V);
    r = instReward(M,A,goal);
    μ = initialDistribution(M);
    for i in 1:length(γRange)
        γ = γRange[i]
        # DPO
        ηROSA, modelROSA = ROSA(α, β, r, μ, γ, 0)
        πROSA = observationPolicy(ηROSA, β)
        rewardsROSA[i,j] = RExact(πROSA, α, β, γ, μ, r)
        timesROSA[i,j] = solve_time(modelROSA)
        statusROSA = vcat(statusROSA, raw_status(modelROSA))
    end
end


******************************************************************************
This program contains Ipopt, a library for large-scale nonlinear optimization.
 Ipopt is released as open source code under the Eclipse Public License (EPL).
         For more information visit https://github.com/coin-or/Ipopt
******************************************************************************



73.854297661

In [27]:
writedlm("data/ROSASizeFixedRewards.csv", rewardsROSA, ',')
writedlm("data/ROSASizeFixedTimes.csv", timesROSA, ',')
writedlm("data/ROSASizeFixedStatus.csv", statusROSA, ',')