In [None]:
using Pkg
if isfile("../Project.toml") && isfile("../Manifest.toml")
    Pkg.activate("..");
    ENV["PYTHON"] = "python3";
end
 
#Pkg.build("PyCall")

In [None]:
using JuliaProbo

In [None]:
xlim = [-5.0, 5.0]
ylim = [-5.0, 5.0]
world = PuddleWorld(xlim, ylim)
push!(world, Puddle([-2.0, 0.0], [0.0, 2.0], 0.1))
push!(world, Puddle([-0.5, -2.0], [2.5, 1.0], 0.1))

In [None]:
function value_iteration_sweep(dp::DynamicProgramming)
    max_Δ = 0.0
    indices = dp.indices
    final_state_flags = dp.final_state_flags_
    value_function = copy(dp.value_function_)
    for index in indices
        if final_state_flags[index...] == 0.0
            max_a = nothing
            max_q = -1e100
            for action in dp.actions
                q = action_value(dp, action, [index...], value_function; out_penalty = true)
                if q > max_q
                    max_a = copy(action)
                    max_q = q
                end
            end
            Δ = abs(value_function[index...] - max_q)
            max_Δ = max(Δ, max_Δ)
            dp.value_function_[index...] = max_q
            dp.policy_[index..., :] = max_a
        end
    end
    return max_Δ
end

In [None]:
import Seaborn, Images, PyPlot
import Plots: @animate, gif

sampling_num = 10
dp = DynamicProgramming([0.1, 0.1, pi/20], Goal(-3.0, -3.0), dt=0.1);
init_value(dp)
init_policy(dp)
init_state_transition_probs(dp, 0.1, sampling_num)
init_depth(dp, world, sampling_num)

In [None]:
Δ = 1e100
sweep_num = 0
while Δ > 0.01 && sweep_num < 300
    Δ = value_iteration_sweep(dp)
    sweep_num += 1
    if (sweep_num-1) % 5 == 0
        println("$(sweep_num)th: $(Δ)")
    end
end
println("$(sweep_num)th: $(Δ)")

In [None]:
p = zeros(Float64, dp.index_nums...)
for index in dp.indices
    p[index...] = sum(dp.policy_[index..., :])
end

Seaborn.heatmap(rotl90(p[:, :, 20]))

In [None]:
PyPlot.clf()
Seaborn.heatmap(rotl90(dp.value_function_[:, :, 20]))

In [None]:
using Images
sampling_num = 10
dp = DynamicProgramming([0.1, 0.1, pi/20], Goal(-3.0, -3.0), dt=0.1);
init_value(dp)
init_policy(dp)
init_state_transition_probs(dp, 0.1, sampling_num)
init_depth(dp, world, sampling_num)
file_names = Vector{String}(undef, 0)

Δ = 1e100
sweep_num = 0
while Δ > 0.01 && sweep_num < 300
    Seaborn.heatmap(rotl90(dp.value_function_[:, :, 20]))
    fname = "ch10_dynamic_programming1_tmp$(sweep_num).png"
    Seaborn.savefig(fname)
    PyPlot.clf()
    push!(file_names, fname)
    Δ = value_iteration_sweep(dp)
    sweep_num += 1
    if (sweep_num-1) % 5 == 0
        println("$(sweep_num)th: $(Δ)")
    end
end
anim = @animate for i in 1:sweep_num
    Plots.plot(Images.load(file_names[i]), axis=nothing)
    rm(file_names[i], force=true)
end
gif(anim, "images/ch10_dp1_value_iteration.gif", fps=10)