diff --git a/GAN/MMD_GAN/mmd_gan_1d.jl b/GAN/MMD_GAN/mmd_gan_1d.jl index 2fbcf75..ee1a3ef 100644 --- a/GAN/MMD_GAN/mmd_gan_1d.jl +++ b/GAN/MMD_GAN/mmd_gan_1d.jl @@ -38,9 +38,9 @@ end epochs::Int = 1000 num_gen::Int = 1 num_enc_dec::Int = 1 - lr_enc::Float64 = 1.0e-10 - lr_dec::Float64 = 1.0e-10 - lr_gen::Float64 = 1.0e-10 + lr_enc::Float64 = 1.0e-3 + lr_dec::Float64 = 1.0e-3 + lr_gen::Float64 = 1.0e-3 lambda_AE::Float64 = 8.0 @@ -73,6 +73,8 @@ function train_mmd_gan_1d(enc, dec, gen, hparams::HyperParamsMMD1D) @showprogress for epoch in 1:(hparams.epochs) for _ in 1:(hparams.num_enc_dec) loss, grads = Flux.withgradient(enc, dec) do enc, dec + Flux.reset!(enc) + Flux.reset!(dec) target = Float32.(rand(hparams.target_model, hparams.batch_size)) noise = Float32.(rand(hparams.noise_model, hparams.batch_size)) encoded_target = enc(target') @@ -93,6 +95,7 @@ function train_mmd_gan_1d(enc, dec, gen, hparams::HyperParamsMMD1D) end for _ in 1:(hparams.num_gen) loss, grads = Flux.withgradient(gen) do gen + Flux.reset!(gen) target = Float32.(rand(hparams.target_model, hparams.batch_size)) noise = Float32.(rand(hparams.noise_model, hparams.batch_size)) encoded_target = enc(target') diff --git a/benchmarks/benchmark_multimodal.jl b/benchmarks/benchmark_multimodal.jl index eb2ef6f..252a604 100644 --- a/benchmarks/benchmark_multimodal.jl +++ b/benchmarks/benchmark_multimodal.jl @@ -13,7 +13,7 @@ include("benchmark_utils.jl") dscr = Chain( Dense(1, 11), elu, Dense(11, 29), elu, Dense(29, 11), elu, Dense(11, 1, σ) ) - target_model = MixtureModel([Normal(4.0f0, 2.0f0), Normal(-2.0f0, 1.0f0)]) + target_model = hparams = HyperParamsVanillaGan(; data_size=100, batch_size=1, @@ -29,7 +29,7 @@ include("benchmark_utils.jl") train_vanilla_gan(dscr, gen, hparams) hparams = HyperParams(; - samples=1000, K=100, epochs=100, η=1e-2, transform=noise_model + samples=1000, K=10, epochs=1000, η=1e-2, transform=noise_model ) #hparams = AutoAdaptativeHyperParams(; # max_k=20, samples=1200, epochs=10000, η=1e-3, transform=noise_model @@ -39,6 +39,7 @@ include("benchmark_utils.jl") #save_gan_model(gen, dscr, hparams) + adaptative_block_learning_1(gen, loader, hparams) ksd = KSD(noise_model, target_model, n_samples, 18:0.1:28) @@ -53,13 +54,13 @@ include("benchmark_utils.jl") #save_gan_model(gen, dscr, hparams) plot_global( - x -> -quantile.(target_model, cdf(noise_model, x)), + x -> quantile.(target_model, cdf(noise_model, x)), noise_model, target_model, gen, n_samples, (-3:0.1:3), - (5:0.2:15), + (:0.2:10), ) #@test js_divergence(hist1.weights, hist2.weights)/hparams.samples < 0.03 diff --git a/benchmarks/benchmark_unimodal.jl b/benchmarks/benchmark_unimodal.jl index 5261085..9b3c6cb 100644 --- a/benchmarks/benchmark_unimodal.jl +++ b/benchmarks/benchmark_unimodal.jl @@ -92,7 +92,7 @@ include("benchmark_utils.jl") dscr = Chain( Dense(1, 11), elu, Dense(11, 29), elu, Dense(29, 11), elu, Dense(11, 1, σ) ) - target_model = Cauchy(23.0f0, 1.0f0) + target_model = MixtureModel([Normal(-10.0, 1.0), Uniform(-5.0,5.0), Pareto(3.0, 10.0)]) hparams = HyperParamsVanillaGan(; data_size=100, batch_size=1, @@ -108,7 +108,7 @@ include("benchmark_utils.jl") train_vanilla_gan(dscr, gen, hparams) hparams = AutoAdaptativeHyperParams(; - max_k=10, samples=1000, epochs=400, η=1e-2, transform=noise_model + max_k=10, samples=1000, epochs=1000, η=1e-2, transform=noise_model ) train_set = Float32.(rand(target_model, hparams.samples)) loader = Flux.DataLoader(train_set; batchsize=-1, shuffle=true, partial=false) @@ -130,7 +130,7 @@ include("benchmark_utils.jl") gen, n_samples, (-3:0.1:3), - (18:0.1:55), + (-20:0.2:30), ) end @@ -313,9 +313,7 @@ end dscr = Chain( Dense(1, 11), elu, Dense(11, 29), elu, Dense(29, 11), elu, Dense(11, 1, σ) ) - target_model = MixtureModel([ - Normal(5.0f0, 2.0f0), Normal(-1.0f0, 1.0f0), Normal(-7.0f0, 0.4f0) - ]) + target_model = Pareto(1.0f0, 2.0f0) hparams = HyperParamsWGAN(; noise_model=noise_model, @@ -323,7 +321,7 @@ end data_size=100, batch_size=1, epochs=1e3, - n_critic=4, + n_critic=2, lr_dscr=1e-2, #lr_gen = 1.4e-2, lr_gen=1e-2, @@ -331,11 +329,11 @@ end loss = train_wgan(dscr, gen, hparams) - hparams = HyperParams(; samples=100, K=10, epochs=2000, η=1e-3, noise_model) + hparams = HyperParams(; samples=100, K=10, epochs=1000, η=1e-3, noise_model) train_set = rand(target_model, hparams.samples) loader = Flux.DataLoader(train_set; batchsize=-1, shuffle=true, partial=false) - adaptative_block_learning(gen, loader, hparams) + auto_adaptative_block_learning(gen, loader, hparams) ksd = KSD(noise_model, target_model, n_samples, 20:0.1:25) mae = min( @@ -347,6 +345,9 @@ end MSE(noise_model, x -> .-x .+ 23, n_sample), ) + save_gan_model(gen, dscr, hparams) + + #@test js_divergence(hist1.weights, hist2.weights)/hparams.samples < 0.03 end @@ -567,6 +568,8 @@ end mse = MSE( noise_model, x -> quantile.(target_model, cdf(noise_model, x)), n_sample ) + + save_adaptative_model(gen, hparams) end @test_experiments "Uniform(-1,1) to Pareto(1,23)" begin @@ -618,23 +621,34 @@ end dec = Chain(Dense(29, 11), elu, Dense(11, 1)) gen = Chain(Dense(1, 7), elu, Dense(7, 13), elu, Dense(13, 7), elu, Dense(7, 1)) - target_model = Normal(23.0f0, 1.0f0) + target_model = Normal(4.0f0, 2.0f0) hparams = HyperParamsMMD1D(; noise_model=noise_model, target_model=target_model, - data_size=1, + data_size=100, batch_size=1, num_gen=1, num_enc_dec=5, - epochs=1e5, - lr_dec=1.0e-2, - lr_enc=1.0e-2, - lr_gen=1.0e-2, + epochs=1000000, + lr_dec=1.0e-3, + lr_enc=1.0e-3, + lr_gen=1.0e-3, ) train_mmd_gan_1d(enc, dec, gen, hparams) + plot_global( + x -> quantile.(target_model, cdf(noise_model, x)), + noise_model, + target_model, + gen, + n_samples, + (-3:0.1:3), + (-5:0.2:10), + ) + + hparams = HyperParams(; samples=100, K=10, epochs=2000, η=1e-3, noise_model) train_set = rand(target_model, hparams.samples) loader = Flux.DataLoader(train_set; batchsize=-1, shuffle=true, partial=false) @@ -651,6 +665,16 @@ end MSE(noise_model, x -> .-x .+ 23, n_sample), ) + plot_global( + x -> quantile.(target_model, cdf(noise_model, x)), + noise_model, + target_model, + gen, + n_samples, + (-3:0.1:3), + (-5:0.2:10), + ) + #@test js_divergence(hist1.weights, hist2.weights)/hparams.samples < 0.03 end diff --git a/benchmarks/benchmark_utils.jl b/benchmarks/benchmark_utils.jl index b1030b1..26cdd0f 100644 --- a/benchmarks/benchmark_utils.jl +++ b/benchmarks/benchmark_utils.jl @@ -88,7 +88,7 @@ function plot_transformation(real_transform, gen, range) linecolor=:redsblues, ) y = gen(range') - return plot!(range, vec(y); legend=:bottomright, label="neural network", linecolor=get(ColorSchemes.rainbow, 0.2), ylims=(-20,20)) + return plot!(range, vec(y); legend=:bottomright, label="neural network", linecolor=get(ColorSchemes.rainbow, 0.2), ylims=(-10,10)) end function plot_global( @@ -149,7 +149,7 @@ function save_gan_model(gen, dscr, hparams) function getName(hparams) gan = gans[typeof(hparams)] lr_gen = hparams.lr_gen - dscr_steps = hparams.dscr_steps + dscr_steps = hparams.n_critic noise_model = replace(strip(string(hparams.noise_model)), "\n" => "", r"(K = .*)" => "", r"components\[.*\] " => "", r"prior = " => "", "μ=" => "", "σ=" => "", r"\{Float.*\}" => "") target_model = replace(strip(string(hparams.target_model)), "\n" => "", r"(K = .*)" => "", r"components\[.*\] " => "", r"prior = " => "", "μ=" => "", "σ=" => "", r"\{Float.*\}" => "") basename = "$gan-$noise_model-$target_model-lr_gen=$lr_gen-dscr_steps=$dscr_steps" diff --git a/examples/time_series_predictions/model.jl b/examples/time_series_predictions/model.jl new file mode 100644 index 0000000..4115903 --- /dev/null +++ b/examples/time_series_predictions/model.jl @@ -0,0 +1,374 @@ +using Flux +using Random +using Statistics + +using AdaptativeBlockLearning +using Distributions +using DataFrames +using CSV +using Plots + +include("../../benchmarks/benchmark_utils.jl") +include("ts_utils.jl") + +function ts_covariates_mse_learning(nn_model, Xₜ, Xₜ₊₁, hparams) + losses = [] + optim = Flux.setup(Flux.Adam(hparams.η), nn_model) + @showprogress for epoch in (1:(hparams.epochs)) + Flux.reset!(nn_model) + loss, grads = Flux.withgradient(nn_model) do nn + nn(Xₜ[1]) + sum(Flux.Losses.mse.([nn(x)[1] for x in Xₜ[1:end]], Xₜ₊₁[1:end])) + end + Flux.update!(optim, nn_model, grads[1]) + push!(losses, loss) + end + return losses +end + +function ts_mse_learning(nn_model, Xₜ, Xₜ₊₁, hparams) + losses = [] + optim = Flux.setup(Flux.Adam(hparams.η), nn_model) + @showprogress for epoch in (1:(hparams.epochs)) + Flux.reset!(nn_model) + loss, grads = Flux.withgradient(nn_model) do nn + nn([Xₜ[1]]') + sum(Flux.Losses.mse.([nn([x]')[1] for x in Xₜ[1:(end - 1)]], Xₜ₊₁[1:end])) + end + Flux.update!(optim, nn_model, grads[1]) + push!(losses, loss) + end + return losses +end + +@test_experiments "testing AutoRegressive Model" begin + ar_hparams = ARParams(; + ϕ=[0.7f0, 0.2f0, 0.1f0], + x₁=rand(Normal(0.0f0, 0.5f0)), + proclen=10000, + noise=Normal(0.0f0, 0.5f0), + ) + hparams = HyperParamsTS(; seed=1234, η=1e-3, epochs=1000, window_size=200, K=5) + + nn_model = Chain(RNN(1 => 32, relu), RNN(32 => 32, relu), Dense(32 => 1, identity)) + + loaderXtrain, loaderYtrain, loaderXtest, loaderYtest = generate_batch_train_test_data( + hparams, ar_hparams + ) + + loss = ts_adaptative_block_learning(nn_model, loaderXtrain, loaderYtrain, hparams) + + plot_ts(nn_model, loaderXtrain, loaderYtrain, hparams) + + loss = ts_mse_learning( + nn_model, collect(loaderXtrain)[1], collect(loaderYtrain)[1], hparams + ) + + @gif for i in 1:100 + histogram( + get_density(nn_model, collect(loaderXtrain)[1], i, 1000); + bins=(-25:0.2:20), + normalize=:pdf, + label="t=$i", + ) + println("$i") + end every 2 + + loss = get_stats(nn_model, loaderXtrain, loaderYtrain, hparams) + + bar(get_window_of_Aₖ(Normal(0.0f0, 1.0f0), nn_model, collect(loaderXtrain)[1], 2)) + + res = [] + for i in (1:(hparams.epochs - 1)) + Flux.reset!(nn_model) + for data in collect(loaderXtrain)[i] + nn_model.([[data]]) + end + + prediction = Vector{Float32}() + for data in collect(loaderXtest)[i] + y = nn_model.([[data]]) + append!(prediction, y[1]) + end + r, _ = yule_walker(Float64.(prediction); order=3) + push!(res, r) + end + + plot(prediction; seriestype=:scatter) + plot!(Float32.(collect(loaderXtest)[1]); seriestype=:scatter) + + ND(Float32.(collect(loaderXtest)[1])[1:200], prediction[1:200]) + + RMSE(Float32.(collect(loaderXtest)[1])[1:200], prediction[1:200]) + + yule_walker(Float64.(collect(loaderYtest)[2]); order=3) + yule_walker(Float64.(prediction); order=3) + + y = collect(loaderYtest)[1] + Flux.reset!(nn_model) + nn_model.([collect(loaderXtest)[1]']) + collect(loaderYtrain)[1] + + get_watson_durbin_test(y, ŷ) +end + +@test_experiments "testing al-pv-2006" begin + #= + https://www.nrel.gov/grid/solar-power-data.html + =# + + csv1 = "examples/time_series_predictions/data/al-pv-2006/Actual_30.45_-88.25_2006_UPV_70MW_5_Min.csv" + csv2 = "examples/time_series_predictions/data/al-pv-2006/Actual_30.55_-87.55_2006_UPV_80MW_5_Min.csv" + csv3 = "examples/time_series_predictions/data/al-pv-2006/Actual_30.55_-87.75_2006_DPV_36MW_5_Min.csv" + csv4 = "examples/time_series_predictions/data/al-pv-2006/Actual_30.55_-88.15_2006_DPV_38MW_5_Min.csv" + csv5 = "examples/time_series_predictions/data/al-pv-2006/Actual_30.55_-88.25_2006_DPV_38MW_5_Min.csv" + + df1 = CSV.File( + csv1; delim=',', header=true, decimal='.', types=Dict("Power(MW)" => Float32) + ) + + df2 = CSV.File( + csv2; delim=',', header=true, decimal='.', types=Dict("Power(MW)" => Float32) + ) + + df3 = CSV.File( + csv3; delim=',', header=true, decimal='.', types=Dict("Power(MW)" => Float32) + ) + + df4 = CSV.File( + csv4; delim=',', header=true, decimal='.', types=Dict("Power(MW)" => Float32) + ) + + df5 = CSV.File( + csv5; delim=',', header=true, decimal='.', types=Dict("Power(MW)" => Float32) + ) + + hparams = HyperParamsTS(; seed=1234, η=1e-3, epochs=100, window_size=1000, K=5) + + nn_model = Chain(RNN(5 => 32, relu), RNN(32 => 32, relu), Dense(32 => 1, identity)) + + num_training_data = 10000 + + loaderXtrain = Flux.DataLoader( + [ + [ + df1["Power(MW)"][i], + df1["Power(MW)"][i], + df1["Power(MW)"][i], + df1["Power(MW)"][i], + df1["Power(MW)"][i], + ] for i in 1:length(df1["Power(MW)"][1:num_training_data]) + ]; + batchsize=round(Int, num_training_data), + shuffle=false, + partial=false, + ) + + loaderYtrain = Flux.DataLoader( + [ + [ + df1["Power(MW)"][i], + df1["Power(MW)"][i], + df1["Power(MW)"][i], + df1["Power(MW)"][i], + df1["Power(MW)"][i], + ] for i in 1:length(df1["Power(MW)"][2:(num_training_data + 1)]) + ]; + batchsize=round(Int, num_training_data), + shuffle=false, + partial=false, + ) + + num_test_data = 200 + loaderXtest = Flux.DataLoader( + [ + [ + df1["Power(MW)"][i], + df1["Power(MW)"][i], + df1["Power(MW)"][i], + df1["Power(MW)"][i], + df1["Power(MW)"][i], + ] for i in + 1:length( + df1["Power(MW)"][num_training_data:(num_training_data + num_test_data)] + ) + ]; + batchsize=round(Int, num_training_data), + shuffle=false, + partial=false, + ) + + loss = ts_covariates_adaptative_block_learning( + nn_model, collect(loaderXtrain)[1], collect(loaderYtrain)[1], hparams + ) + + Flux.reset!(nn_model) + for data in collect(loaderXtrain)[1] + nn_model.([data]) + end + + prediction = Vector{Float32}() + for data in collect(loaderXtest)[1] + y = nn_model.([data]) + append!(prediction, y[1]) + end +end + +@test_experiments "testing 30 years european wind generation" begin + #= + https://www.nrel.gov/grid/solar-power-data.html + =# + + csv1 = "examples/time_series_predictions/data/30-years-of-european-wind-generation/TS.CF.N2.30yr.csv" + + df1 = CSV.File( + csv1; + delim=',', + header=true, + decimal='.', + stripwhitespace=true, + types=Dict("AT11" => Float32), + ) + + hparams = HyperParamsTS(; seed=1234, η=1e-3, epochs=100, window_size=1000, K=5) + + nn_model = Chain(RNN(5 => 32, relu), RNN(32 => 32, relu), Dense(32 => 1, identity)) + + num_training_data = 10000 + + loaderXtrain = Flux.DataLoader( + map( + x -> Float32.(x), + cat( + [ + [df1.FR43[i], df1.FR82[i], df1.FR71[i], df1.FR72[i], df1.FR26[i]] for + i in 1:length(df1[1:num_training_data]) + ]; + dims=1, + ), + ); + batchsize=round(Int, num_training_data), + shuffle=false, + partial=false, + ) + + loaderYtrain = Flux.DataLoader( + map( + x -> Float32.(x), + cat( + [ + [df1.FR43[i], df1.FR82[i], df1.FR71[i], df1.FR72[i], df1.FR26[i]] for + i in 1:length(df1[2:(num_training_data + 1)]) + ]; + dims=1, + ), + ); + batchsize=round(Int, num_training_data), + shuffle=false, + partial=false, + ) + + loss = ts_covariates_adaptative_block_learning( + nn_model, collect(loaderXtrain)[1], collect(loaderYtrain)[1], hparams + ) + + Flux.reset!(nn_model) + for data in collect(loaderXtrain)[1] + nn_model.([data]) + end + + prediction = Vector{Float32}() + for data in collect(loaderXtest)[1] + y = nn_model.([data]) + append!(prediction, y[1]) + end + + real_seriel = [x[1] for x in collect(loaderXtest)[1]] +end + +@test_experiments "testing LD2011_2014" begin + csv_file_path = "/AdaptativeBlockLearning/examples/time_series_predictions" + + df = CSV.File( + csv_file_path; + delim=';', + header=true, + decimal=',', + types=Dict( + "MT_331" => Float32, + "MT_332" => Float32, + "MT_333" => Float32, + "MT_334" => Float32, + "MT_335" => Float32, + "MT_336" => Float32, + "MT_338" => Float32, + ), + ) + + hparams = HyperParamsTS(; seed=1234, η=1e-3, epochs=100, window_size=100, K=5) + + nn_model = Chain(RNN(5 => 32, relu), RNN(32 => 32, relu), Dense(32 => 1, identity)) + + loaderXtrain = Flux.DataLoader( + [ + [df.MT_333[i], df.MT_334[i], df.MT_335[i], df.MT_336[i], df.MT_338[i]] for + i in 1:length(df.MT_335[1:40000]) + ]; + batchsize=round(Int, 40000), + shuffle=false, + partial=false, + ) + + loaderYtrain = Flux.DataLoader( + [ + [df.MT_333[i], df.MT_334[i], df.MT_335[i], df.MT_336[i], df.MT_338[i]] for + i in 1:length(df.MT_335[1:40000]) + ]; + batchsize=round(Int, 200), + shuffle=false, + partial=false, + ) + + loaderXtest = Flux.DataLoader( + [ + [ + df.MT_333[40000 + i], + df.MT_334[40000 + i], + df.MT_335[40000 + i], + df.MT_336[40000 + i], + df.MT_338[40000 + i], + ] for i in 1:length(df.MT_335[40000:40200]) + ]; + batchsize=round(Int, 40000), + shuffle=false, + partial=false, + ) + + loss = ts_covariates_adaptative_block_learning( + nn_model, collect(loaderXtrain)[1], collect(loaderYtrain)[1], hparams + ) + + res = [] + Flux.reset!(nn_model) + for data in collect(loaderXtrain)[1] + y = nn_model.([data]) + append!(prediction, y[1]) + end + + prediction = Vector{Float32}() + for data in collect(loaderXtest)[1] + y = nn_model.([data]) + append!(prediction, y[1]) + end + + prediction = Vector{Float32}() + data = collect(loaderXtest)[1] + y = nn_model(data[1]) + append!(prediction, y[1]) + for i in 1:200 + y = nn_model(vcat(y, data[i][2:end])) + append!(prediction, y[1]) + end + r, _ = yule_walker(Float64.(prediction); order=3) + push!(res, r) +end diff --git a/src/AdaptativeBlockLearning.jl b/src/AdaptativeBlockLearning.jl index 0d8fac8..5f9f150 100644 --- a/src/AdaptativeBlockLearning.jl +++ b/src/AdaptativeBlockLearning.jl @@ -29,6 +29,12 @@ export _sigmoid, AutoAdaptativeHyperParams, auto_adaptative_block_learning, adaptative_block_learning_1, - auto_adaptative_block_learning_1 + auto_adaptative_block_learning_1, + HyperParamsTS, + ts_adaptative_block_learning, + ts_covariates_adaptative_block_learning, + get_proxy_histogram_loss_ts, + get_window_of_Aₖ_ts, + get_density end diff --git a/src/CustomLossFunction.jl b/src/CustomLossFunction.jl index 6e5dcc0..12a175c 100644 --- a/src/CustomLossFunction.jl +++ b/src/CustomLossFunction.jl @@ -4,14 +4,13 @@ Sigmoid function centered at `y`. """ function _sigmoid(ŷ::Matrix{T}, y::T) where {T<:AbstractFloat} - return sigmoid_fast.((y .- ŷ) .* 20) + return sigmoid_fast.((y .- ŷ) .* 10.0f0) end; function _leaky_relu(ŷ::Matrix{T}, y::T) where {T<:AbstractFloat} - return min.(0.001 .* (y .- ŷ) .+ 1., leakyrelu.((y .- ŷ) .* 10, 0.001)) + return min.(0.001 .* (y .- ŷ) .+ 1.0, leakyrelu.((y .- ŷ) .* 10, 0.001)) end; - """ ψₘ(y, m) @@ -140,7 +139,7 @@ function adaptative_block_learning(nn_model, data, hparams) @showprogress for epoch in 1:(hparams.epochs) loss, grads = Flux.withgradient(nn_model) do nn aₖ = zeros(hparams.K + 1) - for i in 1:hparams.samples + for i in 1:(hparams.samples) x = rand(hparams.transform, hparams.K) yₖ = nn(x') aₖ += generate_aₖ(yₖ, data.data[i]) @@ -161,7 +160,7 @@ function adaptative_block_learning_1(nn_model, loader, hparams) @showprogress for data in loader loss, grads = Flux.withgradient(nn_model) do nn aₖ = zeros(hparams.K + 1) - for i in 1:hparams.samples + for i in 1:(hparams.samples) x = rand(hparams.transform, hparams.K) yₖ = nn(x') aₖ += generate_aₖ(yₖ, data[i]) @@ -203,9 +202,7 @@ end; Generate a window of the rv's `Aₖ` for a given model and target function. """ function get_window_of_Aₖ(transform, model, data, K::Int64) - window = count.([ - model(rand(transform, K)') .< d for d in data - ]) + window = count.([model(rand(transform, K)') .< d for d in data]) return [count(x -> x == i, window) for i in 0:K] end; @@ -221,7 +218,7 @@ end; function get_better_K(nn_model, data, min_K, hparams) K = hparams.max_k - for k in min_K:hparams.max_k + for k in min_K:(hparams.max_k) if !convergence_to_uniform(get_window_of_Aₖ(hparams.transform, nn_model, data, k)) K = k break @@ -261,7 +258,7 @@ function auto_adaptative_block_learning(nn_model, data, hparams) end loss, grads = Flux.withgradient(nn_model) do nn aₖ = zeros(K + 1) - for i in 1:hparams.samples + for i in 1:(hparams.samples) x = rand(hparams.transform, K) yₖ = nn(x') aₖ += generate_aₖ(yₖ, data.data[i]) @@ -290,7 +287,7 @@ function auto_adaptative_block_learning_1(nn_model, loader, hparams) end loss, grads = Flux.withgradient(nn_model) do nn aₖ = zeros(K + 1) - for i in 1:hparams.samples + for i in 1:(hparams.samples) x = rand(hparams.transform, K) yₖ = nn(x') aₖ += generate_aₖ(yₖ, data[i]) @@ -302,3 +299,144 @@ function auto_adaptative_block_learning_1(nn_model, loader, hparams) end return losses end; + +# Hyperparameters for the method `ts_adaptative_block_learning` + +""" + HyperParamsTS + +Hyperparameters for the method `ts_adaptative_block_learning` +""" +Base.@kwdef mutable struct HyperParamsTS + seed::Int = 72 # Random seed + dev = cpu # Device: cpu or gpu + η::Float64 = 1e-3 # Learning rate + epochs::Int = 100 # Number of epochs + noise_model = Normal(0.0f0, 1.0f0) # Noise to add to the data + window_size = 100 # Window size for the histogram + K = 10 # Number of simulted observations +end + +# Train and output the model according to the chosen hyperparameters `hparams` + +""" + ts_adaptative_block_learning(nn_model, Xₜ, Xₜ₊₁, hparams) + +Custom loss function for the model `nn_model` on time series data `Xₜ` and `Xₜ₊₁`. +""" +function ts_adaptative_block_learning(nn_model, Xₜ, Xₜ₊₁, hparams) + losses = [] + optim = Flux.setup(Flux.Adam(hparams.η), nn_model) + @showprogress for (batch_Xₜ, batch_Xₜ₊₁) in zip(Xₜ, Xₜ₊₁) + j = 0 + Flux.reset!(nn_model) + nn_model([batch_Xₜ[1]]) # Warm up recurrent model on first observation + loss, grads = Flux.withgradient(nn_model) do nn + aₖ = zeros(hparams.K + 1) + for i in (1:(hparams.window_size)) + xₖ = rand(hparams.noise_model, hparams.K) + nn_cp = deepcopy(nn) + yₖ = nn_cp(xₖ') + aₖ += generate_aₖ(yₖ, batch_Xₜ₊₁[j + i]) + nn([batch_Xₜ[j + i]]) + end + scalar_diff(aₖ ./ sum(aₖ)) + end + Flux.update!(optim, nn_model, grads[1]) + j += hparams.window_size + push!(losses, loss) + end + return losses +end + +""" + ts_covariates_adaptative_block_learning(nn_model, Xₜ, Xₜ₊₁, hparams) + +Custom loss function for the model `nn_model` on time series data `Xₜ` and `Xₜ₊₁`. +Where `Xₜ` is a vector of vectors where each vector [x₁, x₂, ..., xₙ] is a time series where +we want to predict the value at position x₁ using the values [x₂, ..., xₙ] as covariate and +`Xₜ₊₁` is a vector. +""" +function ts_covariates_adaptative_block_learning(nn_model, Xₜ, Xₜ₊₁, hparams) + losses = [] + optim = Flux.setup(Flux.Adam(hparams.η), nn_model) + @showprogress for _ in 1:(hparams.epochs) + j = 0 + Flux.reset!(nn_model) + nn_model(Xₜ[1]) + loss, grads = Flux.withgradient(nn_model) do nn + aₖ = zeros(hparams.K + 1) + for i in (1:(hparams.window_size)) + xₖ = rand(hparams.noise_model, hparams.K) + nn_cp = deepcopy(nn) + yₖ = nn_cp.([vcat(x, Xₜ[j + i][2:end]) for x in xₖ]) + aₖ += generate_aₖ(cat(yₖ...; dims=2), Xₜ₊₁[j + i][1]) + nn(Xₜ[j + i]) + end + scalar_diff(aₖ ./ sum(aₖ)) + end + Flux.update!(optim, nn_model, grads[1]) + j += hparams.window_size + push!(losses, loss) + end + return losses +end + +""" + get_proxy_histogram_loss_ts(nn_model, data_xₜ, data_xₜ₊₁, hparams) + +Get the loss of the model `nn_model` on the data `data_xₜ` and `data_xₜ₊₁` using the +proxy histogram method. +""" +function get_proxy_histogram_loss_ts(nn_model, data_xₜ, data_xₜ₊₁, hparams) + losses = [] + @showprogress for (batch_xₜ, batch_xₜ₊₁) in zip(data_xₜ, data_xₜ₊₁) + j = 0 + Flux.reset!(nn_model) + nn_model([batch_xₜ[1]]) + aₖ = zeros(hparams.K + 1) + for i in 1:(hparams.window_size) + xₖ = rand(hparams.noise_model, hparams.K) + nn_cp = deepcopy(nn_model) + yₖ = nn_cp(xₖ') + aₖ += generate_aₖ(yₖ, batch_xₜ₊₁[j + i]) + nn_model([batch_xₜ[j + i]]) + end + j += hparams.window_size + push!(losses, scalar_diff(aₖ ./ sum(aₖ))) + end + return losses +end + +function get_window_of_Aₖ_ts(transform, model, data, K::Int64) + Flux.reset!(model) + res = [] + for d in data + xₖ = rand(transform, K) + model_cp = deepcopy(model) + yₖ = model_cp(xₖ') + push!(res, yₖ .< d) + end + return [count(x -> x == i, count.(res)) for i in 0:K] +end; + +""" + get_density(nn, data, t, m) + +Generate `m` samples from the model `nn` given the data `data` up to time `t`. +Can be used to generate the histogram at time `t` of the model. +""" +function get_density(nn, data, t, m) + Flux.reset!(nn) + res = [] + for d in data[1:(t - 1)] + nn([d]) + end + for _ in 1:m + nn_cp = deepcopy(nn) + xₜ = rand(Normal(0.0f0, 1.0f0)) + yₜ = nn_cp([xₜ]) + append!(res, yₜ) + end + return res +end