In [None]:
using Pkg
if isfile("../Project.toml") && isfile("../Manifest.toml")
    Pkg.activate("..");
    ENV["PYTHON"] = "python3";
end
 
#Pkg.build("PyCall")

In [None]:
using JuliaProbo

In [None]:
xlim = [-5.0, 5.0]
ylim = [-5.0, 5.0]
world = PuddleWorld(xlim, ylim)
push!(world, Puddle([-2.0, 0.0], [0.0, 2.0], 0.1))
push!(world, Puddle([-0.5, -2.0], [2.5, 1.0], 0.1))

In [None]:
function out_correction(pe::PolicyEvaluator, index::Vector{Int64})
    θ_ind = (index[3] + pe.index_nums[3]) % (pe.index_nums[3])
    if θ_ind == 0
        θ_ind = pe.index_nums[3]
    end
    return (index[1], index[2], θ_ind,)
end

function action_value(pe::PolicyEvaluator, index::Vector{Int64}, value_function::AbstractArray{Float64, 3})
    v, ω = pe.policy_[index..., :]
    value = 0.0
    state_transition_probs = pe.state_transition_probs
    for trans_probs in state_transition_probs[(v, ω, index[3],)]
        trans_ind = trans_probs[1]
        prob = trans_probs[2]
        after_ = [index...] .+ trans_ind
        after = out_correction(pe, after_)
        reward = -pe.dt * pe.depth[after[1], after[2]] * pe.puddle_coeff - pe.dt
        value += (value_function[after...] + reward) * prob
    end
    return value
end

function policy_evaluation_sweep(pe::PolicyEvaluator)
    indices = pe.indices
    final_state_flags = pe.final_state_flags_
    value_function = copy(pe.value_function_)
    for index in indices
        if final_state_flags[index...] == 0.0
            pe.value_function_[index...] = action_value(pe, [index...], value_function)
        end
    end
end

In [None]:
import Seaborn, Images, PyPlot
import Plots: @animate, gif

sampling_num = 10
pe = PolicyEvaluator([0.1, 0.1, pi/20], Goal(-3.0, -3.0), dt=0.1);
init_value(pe)
init_policy(pe)
init_state_transition_probs(pe, 0.1, sampling_num)
init_depth(pe, world, sampling_num)
sweep_num = 300
file_names = Vector{String}(undef, 0)

for i in 1:sweep_num
    Seaborn.heatmap(rotl90(pe.value_function_[:, :, 20]))
    fname = "ch10_policy_evaluation6_tmp$(i).png"
    Seaborn.savefig(fname)
    PyPlot.close("all")
    push!(file_names, fname)
    policy_evaluation_sweep(pe)
end
anim = @animate for i in 1:sweep_num
    Plots.plot(Images.load(file_names[i]), axis=nothing)
    rm(file_names[i], force=true)
end
gif(anim, "images/ch10_policy_evaluation6.gif", fps=10)

In [None]:
sampling_num = 10
pe = PolicyEvaluator([0.1, 0.1, pi/20], Goal(-3.0, -3.0), dt=0.1);
init_value(pe)
init_policy(pe)
init_state_transition_probs(pe, 0.1, sampling_num)
init_depth(pe, world, sampling_num)
sweep_num = 10
file_names = Vector{String}(undef, 0)

using PyPlot
using PyCall
using Seaborn
using Base64
@pyimport matplotlib.animation as anim
fig = figure("MyFigure",figsize=(5,5))
function animate(i)
    PyPlot.clf()
    p = Seaborn.heatmap(rotl90(pe.value_function_[:, :, 20]))
    policy_evaluation_sweep(pe)    
    return p
end
@time myanim = anim.FuncAnimation(fig, animate, frames=sweep_num, interval=100);
println("Done.")
@time myanim[:save]("images/ch10_policy_evaluation6.mp4");
PyPlot.close("all")
println("Saved.")
# https://genkuroki.github.io/documents/Jupyter/20170624%20Examples%20of%20animations%20in%20Julia%20by%20PyPlot%20and%20matplotlib.animation.html
function html_video(filename)
    open(filename) do f
        base64_video = base64encode(f)
        """<video controls src="data:video/x-m4v;base64,$base64_video">"""
    end
end
display("text/html", html_video("images/ch10_policy_evaluation6.mp4"));