In [1]:
# Include all the utilities
include("utilities.jl");

In [2]:
# Read mazes that should be solved
max_size = 10
n_averages = 100

mazes = [zeros(2*i+1, 2*i+1) for i in 1:max_size, j in 1:n_averages];

for i in 1:max_size
    test = readdlm("mazes/mazes$i.csv", ',', Float64)
    test = reshape(test, 100, 2*i+1, 2*i+1)
    for j in 1:n_averages
        mazes[i,j] = test[j,:,:]
    end
end

In [3]:
# Allocate memory for the computations with fixed discount
timesROSA = zeros(max_size, n_averages);
rewardsROSA = zeros(max_size, n_averages);
statusROSA = [];

In [4]:
# Define the visibility / observability mask
V = [1 1 1; 1 0 1; 1 1 1];implementationimplementation

In [5]:
# Fix discount factor
γ = 0.9999;

In [7]:
# Do the computations
@elapsed for i in 1:max_size
    for j in 1:n_averages
        M = mazes[i,j];
        states = listOfStates(M);
        goal = rand(states);
        α = transitionKernel(M,A,goal);
        β = observationKernel(M,V);
        r = instReward(M,A,goal);
        μ = initialDistribution(M);
        # Evaluate ROSA
        ηROSA, modelROSA = ROSA(α, β, r, μ, γ, 0)
        πROSA = observationPolicy(ηROSA, β)
        rewardsROSA[i,j] = RExact(πROSA, α, β, γ, μ, r)
        timesROSA[i,j] = solve_time(modelROSA)
        statusROSA = vcat(statusROSA, raw_status(modelROSA))
    end
    println("ROSA time for size $i: ", sum(timesROSA[i,:]) / n_averages)
    println("ROSA reward for size $i: ", sum(rewardsROSA[i,:]) / n_averages)
end

ROSA time for size 1: 0.0033232855796813967
ROSA reward for size 1: 0.9999999999999999
ROSA time for size 2: 0.005515432357788086
ROSA reward for size 2: 2.128180734401456
ROSA time for size 3: 0.010333473682403565
ROSA reward for size 3: 2.5829818468288837
ROSA time for size 4: 0.018900439739227295
ROSA reward for size 4: 1.9728362907807757
ROSA time for size 5: 0.03471754789352417
ROSA reward for size 5: 1.5469674832441018
ROSA time for size 6: 0.061869676113128665
ROSA reward for size 6: 1.0499296650526062
ROSA time for size 7: 0.09794137239456177
ROSA reward for size 7: 0.8449706331635989
ROSA time for size 8: 0.1497805881500244
ROSA reward for size 8: 0.5791777066683114
ROSA time for size 9: 0.2162007713317871
ROSA reward for size 9: 0.5202625830921435
ROSA time for size 10: 0.30965180158615113
ROSA reward for size 10: 0.45805779200483343


119.397367823

In [48]:
writedlm("data/ROSADiscountFixedRewardsSmall.csv", rewardsROSA, ',')
writedlm("data/ROSADiscountFixedTimesSmall.csv", timesROSA, ',')
writedlm("data/ROSADiscountFixedStatusSmall.csv", statusROSA, ',')