# ORNN using the GRU layer from Martinez/PyTorch for legs

In [1]:
# using Revise
using LinearAlgebra, Random
using StatsBase, Statistics
using Distributions, MultivariateStats   # Categorical, P(P)CA
using Quaternions    # For manipulating 3D Geometry
using MeshCat        # For web visualisation / animation
using PyPlot         # Plotting
using AxUtil         # Cayley, skew matrices
using Flux, CuArrays # Optimisation
using DSP            # convolution / low-pass (MA) filter

# small utils libraries
using ProgressMeter, Formatting, ArgCheck, Dates
using BSON, NPZ

In [2]:
DIR_MOCAP_MTDS = "." 

# Data loading and transformation utils
include(joinpath(DIR_MOCAP_MTDS, "io.jl"))

# MeshCat skeleton visualisation tools
include(joinpath(DIR_MOCAP_MTDS, "mocap_viz.jl"))

# Data scaling utils
include(joinpath(DIR_MOCAP_MTDS, "util.jl"))

# Models: LDS
include(joinpath(DIR_MOCAP_MTDS, "models.jl"))

# Table visualisation
include(joinpath(DIR_MOCAP_MTDS, "pretty.jl"))

In [3]:
############################################
##    CUSTOM WIDELY USED FUNCTIONS
function zero_grad!(P) 
    for x in P
        x.grad .= 0
    end
end

const NoGradModels = Union{model.MyLDS_ng, model.ORNN_ng}
const _var_cache = IdDict()

mse(Δ::AbstractArray, scale=size(Δ, 1)) = mean(x->x^2, Δ)*scale

function mse(d::mocaputil.DataIterator, m::NoGradModels)
    obj = map(d) do (y, u, new_state)
        new_state && (m.h .= zeros(size(m, 1))) 
        u_ = (m isa model.ORNN_ng && length(m.inpnn)) > 0 ? vcat(u, m.inpnn(u)) : u
        mse(m(u_) - y)
    end
    m.h .= zeros(size(m, 1))
    return dot(obj, mocaputil.weights(d, as_pct=true))
end


mse(Ds::Vector{D}, m::NoGradModels) where {D <: Dict} = mse(mocaputil.DataIterator(Ds, 1000000), m)
mse(D::Dict, m::NoGradModels) = mse(m(D[:U]) - D[:Y])
mse(V::Tuple, m::NoGradModels) = mse(m(V[2]) - V[1])

# Calculate variance
function _calc_var!(cache::IdDict, d::mocaputil.DataIterator)
    Y = reduce(hcat, [y for (y, u, h) in d])
    _var_cache[d] = var(Y, dims=2)
end

function _calc_var!(cache::IdDict, d::Vector{D}) where {D <: Dict}
    Y = reduce(hcat, [dd[:Y] for dd in d])
    _var_cache[d] = var(Y, dims=2)
end

function Statistics.var(d::Union{mocaputil.DataIterator, Vector{D}}) where {D <: Dict}
    !haskey(_var_cache, d) && _calc_var!(_var_cache, d)
    return _var_cache[d]
end
Statistics.var(d::Dict) = var(d[:Y], dims=2)

# Standardised MSE
smse(Δ::AbstractArray, scale=size(Δ, 1)) = mse(Δ, scale) / sum(var(Δ, dims=2))

smse(d::mocaputil.DataIterator, m::NoGradModels) = mse(d, m) / sum(var(d))
smse(D::Dict, m::NoGradModels) = mse(m(D[:U]) - D[:Y]) / sum(var(D))
smse(Ds::Vector{D}, m::NoGradModels) where {D <: Dict} = mse(mocaputil.DataIterator(Ds, 1000000), m) / sum(var(Ds))
smse(D::Tuple, m::NoGradModels) = mse(D, m) / sum(var(D[1], dims=2))

rsmse(args...) = sqrt(smse(args...))

In [4]:
function mse(d::mocaputil.DataIterator, m::model.MTLDS_ng, z::AbstractArray)
    @argcheck size(z, 2) == length(d)
    obj = map(enumerate(d)) do (ii, (y, u, new_state))
        new_state && (m.h .= zeros(size(m, 1))) 
        cmodel = model.make_lds(m, z[:,ii], m.η_h)
        mse(cmodel(u) - y)
    end
    m.h .= zeros(size(m, 1))
    return dot(obj, mocaputil.weights(d, as_pct=true))
end


function mse(d::mocaputil.DataIterator, m::model.ORNN_ng, z::AbstractArray, nn::Chain)
    @argcheck size(z, 2) == length(d)
    obj = map(enumerate(d)) do (ii, (y, u, new_state))
        new_state && (m.h .= zeros(size(m, 1))) 
        cmodel = model.make_rnn_psi(m, Tracker.data(nn(z[:,ii])), 1f0)
        u_ = length(m.inpnn) > 0 ? vcat(u, m.inpnn(u)) : u
        mse(cmodel(u_) - y)
    end
    m.h .= zeros(size(m, 1))
    return dot(obj, mocaputil.weights(d, as_pct=true))
end

smse(d::mocaputil.DataIterator, m::model.MTLDS_ng, z::AbstractArray) = mse(d, m, z) / sum(var(d))
smse(d::mocaputil.DataIterator, m::model.ORNN_ng, z::AbstractArray, nn::Chain) = mse(d, m, z, nn) / sum(var(d))

### Load in Data
See `2_Preprocess.ipynb`

**Note that in the current harddisk state**,
* `edin_Ys_30fps.bson` was created with `include_ftcontact=false, fps=30`,
* `edin_Xs_30fps.bson` was created with `include_ftcontact=true, include_ftmid=true, joint_pos=false, fps=fps, speed=false`.

In [5]:
# task descriptors
styles_lkp = BSON.load("styles_lkp")[:styles_lkp];

In [6]:
# Load in data
Usraw = BSON.load("edin_Xs_30fps.bson")[:Xs];
Ysraw = BSON.load("edin_Ys_30fps.bson")[:Ys];

In [7]:
Ysraw = [y[2:end,:] for y in Ysraw]
Usraw = [hcat(u[2:end,1:end-8], u[1:end-1,end-7:end]) for u in Usraw];

In [8]:
# Standardise inputs and outputs
standardize_Y = fit(mocaputil.MyStandardScaler, reduce(vcat, Ysraw),  1)
standardize_U = fit(mocaputil.MyStandardScaler, reduce(vcat, Usraw),  1)

Ys = [mocaputil.scale_transform(standardize_Y, y[2:end, :] ) for y in Ysraw];  # (1-step ahead of u)
Us = [mocaputil.scale_transform(standardize_U, u[1:end-1,:]) for u in Usraw];  # (1-step behind y)

@assert (let c=cor(Usraw[1][1:end-1, :], Ysraw[1][2:end, :], dims=1); 
        !isapprox(maximum(abs.(c[.!isnan.(c)])), 1.0); end) "some input features perfectly correlated"

# to invert: `mocaputil.invert(standardize_Y, y)`

In [9]:
# SENSE CHECK
# check that no bugs in constructing U, Y (i.e. esp that t's align and can predict U --> Y)
let c=cor(reduce(vcat, Us) |>f64, reduce(vcat, Ys) |> f64, dims=1)
    imshow(c, aspect="auto")
    nonan_c = c[.!isnan.(c)]
    title(format("max (abs) corrcoeff: {:.8f}", maximum(abs.(nonan_c))))
    flush(stdout)
#     display(findmax(reshape(nonan_c, size(c, 1) - 2, size(c,2))))
#     printfmtln("10th best result {:.5f}", reverse(sort(nonan_c))[10]) 
end
colorbar()

In [10]:
expmtdata = mocapio.ExperimentData(Ysraw, [Matrix(y') for y in Ys], 
    [Matrix(u') for u in Us], styles_lkp);
# see ?mocapio.get_data

In [11]:
function data_ahead(dataIters, start_ix, k_ahead)
    reduce(hcat, [dataIters[i][1] for i in start_ix+1:start_ix+k_ahead]),
    reduce(hcat, [dataIters[i][2] for i in start_ix+1:start_ix+k_ahead])
end

# MT-ORNN (Hard-EM) experiment

#### Setup data

In [12]:
# Get training set for STL and pooled models.
style_ix = 1
d = d_state = 100;

In [13]:
trainPool, validPool, testPool = mocapio.get_data(expmtdata, style_ix, :split, :pooled);

In [14]:
# construct batch iterator
batch_size = 64
min_size = 50
trainIter = mocaputil.DataIterator(trainPool, batch_size, min_size=min_size);
trainIters = collect(trainIter);

In [15]:
# style segment lookups
style_names = ["angry", "childlike", "depressed", "neutral", "old", "proud", "sexy", "strutting"];
segment_lkp = [length(mocaputil.DataIterator(mocapio.get_data(expmtdata, i, :train, :stl, split=[0.875,0.125]),
            batch_size, min_size=50)) for i in 2:8];
segment_lkp = [collect(i+1:j) for (i,j) in zip(vcat(0, cumsum(segment_lkp[1:end-1])), cumsum(segment_lkp))];

#### Base model

Model 1: Bottom half only: indices 1-3 (root speed: x,z,w), 4 (root height: y), 5-28 (legs, feet: x,y,z)

In [16]:
gru1_data = npzread(format("gru1legs/edin_Us_legs{:d}_30_fps.npz", style_ix))
new_Us = [gru1_data[string(i)] for i in 1:length(gru1_data)];

In [17]:
expmtdata_legs = mocapio.ExperimentData(Ysraw, [Matrix(y') for y in Ys], 
    [Matrix(u') for u in new_Us], styles_lkp);


pool2 = mocapio.get_data(expmtdata_legs, style_ix, :split, :pooled)
trainPool2, validPool2, testPool2 = pool2
trainIter2, validIter2, testIter2 = map(x->mocaputil.DataIterator(x, 64, min_size=min_size), pool2)
trainIters2, validIters2, testIters2 = map(collect, (trainIter2, validIter2, testIter2));

In [18]:
fig, axs = subplots(5,4,figsize=(10,10))
_batch_num = 55
offset = 0
_Yb, _Ub = data_ahead(trainIters, _batch_num-1, 2)
_Yb2, _Ubhat = data_ahead(trainIters2, _batch_num-1, 2)
for i in 1:20
    axs[:][i].plot(_Yb'[:, i+offset])
    axs[:][i].plot(_Ubhat'[:, i+offset], alpha=0.4)
end

### Problem

* We **do** want the model to learn the same root/leg motion when turning various tight corners at varying speeds. This is *despite the fact that many styles contain very few corner types, and little variation in speed*. **We want generalisation over basic motion**.
* We **do not** want the model to learn the same body position and arm motions when travelling certain trajectories and speeds. But we have the same situation that *many styles contain only a few trajectory types and little variation in speed*. **We do not want generalisation over style types**.

The model cannot possibly distinguish that which we do wish to generalise and that which we do not. All of these factors will be highly entangled in whatever representation it learns. The strategy of learning a pooled model for the root/legs and a bespoke (MT) model for the rest of the body works ok, so long as the root model does not extract too many features in the legs, and hence cause information leakage about style. Unfortunately we have the situation that either the challenging corner set pieces are jerky (low model capacity) or information leakage occurs (high model capacity).

#### MT-ORNN
Note that the MTORNN object is still not mature, and I'm just manipulating directly below.

In [58]:
model_1_ixs = 1:28
train_neutral = mocapio.get_data(expmtdata, 4, :train, :stl, concat=true, simplify=true);
train_neutral[:Y] = train_neutral[:Y][model_1_ixs, :];

In [59]:
# init a:
# extract cθ from a spectral LDS fit for ORNN initialisation (see Henaff et al. for block diag init motivation)
lds_evs = cos.(rand(Float32, Int(d/2))*π/8)
blkvals = vcat([sqrt((1-ct)/(1+ct)) for ct in real(lds_evs)]', 
                zeros(Float32, floor(Int, d_state/2))')[1:end-1]
a = AxUtil.Math.unmake_lt_strict(diagm(-1=>blkvals), d_state)
a = vcat(ones(Float32, 10)*atanh(0.5f0), ones(Float32, 10)*atanh(0.75f0), 
    ones(Float32, d_state-20)*atanh(0.9f0), a);

In [60]:
d_out, d_in = length(model_1_ixs), size(train_neutral[:U], 1)

In [61]:
C = Tracker.param(Flux.glorot_normal(64, d));
B = Tracker.param(Flux.glorot_normal(d, 28));
D = Tracker.param(Flux.glorot_normal(64, 28));
D.data[1:28, 1:28] = f32(Matrix(I, 28, 28))
b_offset = Tracker.param(zeros(Float32, d));
d_offset = Tracker.param(zeros(Float32, 64));
hstate = Tracker.param(zeros(Float32, d))
# initialise base model
ornn_base = model.ORNN_g(param(a), B, b_offset, hstate, C, D, d_offset, tanh, Chain()); #inpnn);
ornn_base_ng = model.make_nograd(ornn_base);

In [62]:
ornn_optim = copy(ornn_base)
ornn_optim_ng = model.make_nograd(ornn_optim);

#### Multi-task manifold

In [64]:
k = 3                 # dimension of manifold
d_nn = 200            # "complexity" of manifold
d_subspace = 30;      # dim of subspace (⊆ parameter space) containg the manifold

In [53]:
semilogy(-5:0.1:1, exp.(-5:0.1:1))
semilogy(-5:0.1:1, σ.(3 .* (-5:0.1:1)))
semilogy(-5:0.1:1, σ.(5 .* (-5:0.1:1)))
gca().axhline(0.01, linestyle=":", color="grey")

gca().axvline(log(0.01), linestyle=":", color="grey")
gca().axvline(-log(inv(0.01) -1)/3, linestyle=":", color="grey")
gca().axvline(-log(inv(0.01) -1)/5, linestyle=":", color="grey")
title("Choice of std parameterisation."); gcf().set_size_inches(5,3)

In [71]:
d_par = [length(x) for x in model.pars_no_inpnn(ornn_optim)] |> sum
nn = Chain(Dense(k, d_nn, tanh), Dense(d_nn, d_subspace, identity), 
    Dense(d_subspace, d_par, identity, initW = ((dims...)->Flux.glorot_uniform(dims...)*0.05f0)))
nn_ng = mapleaves(Tracker.data, nn)
Zmu = Flux.param(randn(Float32, k, length(trainIter))*0.01f0);
Zlogit_s = Flux.param(-ones(Float32, k, length(trainIter))*1.76f0);  # ≈ 0.005 std isotropic posterior.

#### Optimisation

In [98]:
function optimise_upper!(ornn_optim, nn, Zmu, Zlogit_s, pars, trainIter, opt, 
        η, n_epochs, shuffle_examples, lower_ixs)
    opt.eta = η
    nB = length(trainIter)
    W = mocaputil.weights(trainIter; as_pct=false) ./ batch_size
    history = ones(n_epochs*nB, 2) * NaN

    for ee in 1:n_epochs
        rnn = RNN(size(ornn_optim,3), size(ornn_optim,1), ornn_optim.σ)

        if shuffle_examples
            mtl_ixs, trainData = mocaputil.indexed_shuffle(trainIter)
        else
            mtl_ixs, trainData = 1:length(trainIter), trainIter
        end
        for (ii, (Yb, Ub, h0)) in zip(mtl_ixs, trainData)
            h0 && Flux.reset!(rnn)
            Tb = size(Yb, 2)      # not constant
            cmu, cstd = Zmu[:, ii], σ.(3*Zlogit_s[:, ii])
            Zs_post = cmu .+ (randn(Float32, k) .* cstd)
            
            # Log reconstruction term
            c_ornn = model.make_rnn_psi(ornn_optim, nn(Zs_post), 1f0)
            model.build_rnn!(rnn, c_ornn)
            x̂ = reduce(hcat, [rnn(Ub[:,i]) for i in 1:Tb])  |> Tracker.collect
            #         ŷ = let m=ornn_optim; m.C*x̂ + m.D*Ub .+ m.d; end   # keep same C, D, d ∀ tasks
            DU = vcat(Ub, c_ornn.D[lower_ixs[end]+1:end,:]*Ub)       # strictly residual connection to lower
            ŷ = let m=c_ornn; m.C*x̂ + DU .+ m.d; end                 # adapt C, D, d too.
            obj = mean(x->x^2, Yb - ŷ) * 8^2 * W[ii]

            # KL term
            KLD = -0.5f0 * sum(1 .+ 2 * log.(cstd) .- cmu.^2 .- cstd.^2)
            obj += KLD
            
            Tracker.back!(obj)
            history[(ee-1)*nB + ii, :] = [obj.data - KLD.data, obj.data]

            if ii % 34 == 0
                for layer in nn.layers
                    obj += 1e-3*sum(abs, layer.W)
                    obj += 1e-3*sum(abs, layer.b)
                end

                for p in pars
                    Tracker.update!(opt, p, Tracker.grad(p))
                end
            end

            rnn.cell.h.data .= 0       # initial state is a param :/. Easier to reset here.
            Flux.truncate!(rnn);
        end
        printfmtln("{:02d}: {:.5f} ({:.5f})", ee, sqrt.(mean(history[(1:nB) .+ nB*(ee-1), :], dims=1))[:]...); 
        flush(stdout)
    end
end

In [92]:
opt = ADAM(1e-4)
pars = Flux.params(nn, Zmu) #, Zlogit_s);

In [99]:
# \approx hard EM initialisation
history1 = optimise_upper!(ornn_optim, nn, Zmu, Zlogit_s, pars, trainIter2, opt, 1e-3, 200, true, 1:28)

In [None]:
# do "global optimisation" of latent zs per 64-length seq here

In [132]:
# reset any gradient accrued on previous step while not optimised
Zlogit_s.grad .= 0

#= rescale latent space so optimisation doesn't have to waste effort on this.
   (Note that can take many hundreds of iterations to spread out into N(0,I),
    and will cause damage to reconstruction while sigmas overlap each other
    in order to get low hanging fruit of massive sigma penalty in KL.) =#
orig_std = std(Tracker.data(Zmu), dims=2)
Zmu = Zmu ./ orig_std;
nn_ng.layers[1].W .*= orig_std';

# include Zlogit_s now in optimisation
pars = Flux.params(nn, Zmu, Zlogit_s);

In [None]:
history2 = optimise_upper!(ornn_optim, nn, Zmu, Zlogit_s, pars, trainIter2, opt, 1e-3, 200, true, 1:28)

In [149]:
history2 = optimise_upper!(ornn_optim, nn, Zmu, Zlogit_s, pars, trainIter2, opt, 1e-3, 100, true, 1:28)

In [151]:
history3 = optimise_upper!(ornn_optim, nn, Zmu, Zlogit_s, pars, trainIter2, opt, 7e-4, 50, true, 1:28)

In [152]:
history4 = optimise_upper!(ornn_optim, nn, Zmu, Zlogit_s, pars, trainIter2, opt, 5e-4, 50, true, 1:28)

In [None]:
history2 = optimise_upper!(ornn_optim, Zmap, trainIter2, opt, 7e-4, 100, true, 1:28)
history3 = optimise_upper!(ornn_optim, Zmap, trainIter2, opt, 5e-4, 100, true, 1:28)
history4 = optimise_upper!(ornn_optim, Zmap, trainIter2, opt, 2e-4, 100, true, 1:28)

In [151]:
history3 = optimise_upper!(ornn_optim, Zmap, trainIter2, opt, 5e-4, 50, true, 1:28)
history4 = optimise_upper!(ornn_optim, Zmap, trainIter2, opt, 2e-4, 50, true, 1:28)

#### Global optimisation of latents

In [101]:
nsmp = 300
_Zsmp = cholesky(cov(Zmu.data')).U * f32(AxUtil.Random.sobol_gaussian(nsmp, k)');

In [102]:
# populate error matrix with above samples
res = ones(Float32, length(trainIter), nsmp)

ornn_optim_ng = model.make_nograd(ornn_optim);
rnn_ng = mapleaves(Tracker.data, RNN(d_in, d_state, ornn_optim.σ))
@time for i in 1:nsmp
    _ψ = nn_ng(_Zsmp[:,i]);
    c_ornn = model.make_rnn_psi(ornn_optim_ng, _ψ, 1f0)
    model.build_rnn!(rnn_ng, c_ornn)
    for (n, (Yb, Ub, h0)) in enumerate(trainIter2)
        h0 && Flux.reset!(rnn_ng)
        Tb = size(Yb, 2)
        x̂ = reduce(hcat, [rnn_ng(Ub[:,i]) for i in 1:Tb])
        ŷ = let m=c_ornn; m.C*x̂ + m.D*Ub .+ m.d; end
        res[n,i] = mean(x->x^2, Yb - ŷ)
    end
end

In [104]:
# sample from implicit posterior (SNIS)
pz = softmax(-32*(res')) 
z_smpopt = copy(Zmu.data)
for i in 1:length(trainIter2)
    z_smpopt[:,i] = _Zsmp[:, argmax(pz[:,i])]
end

In [111]:
# plot to compare with current position
ax = gca()
# ax.scatter(_Zsmp[:,1], _Zsmp[:,2], alpha=0.1)
for i in 1:7
    ixs = segment_lkp[i]
    z = z_smpopt[:, ixs] .+ randn(Float32, k, length(ixs))*0.005
    ax.scatter(z[2,:], z[3,:], color=ColorMap("tab10")(i-1), alpha=0.5)
end
legend(style_names[(1:7) .+ 1])

In [153]:
# plot to compare with current position
ax = gca()
for i in 1:7
    ixs = segment_lkp[i]
    ax.scatter(Zmu.data[2, ixs], Zmu.data[3,ixs], color=ColorMap("tab10")(i-1), alpha=0.5)
end
legend(style_names[(1:7) .+ 1])

In [113]:
# update latents
# error("safeguard")
Zmu.data .= z_smpopt .+ randn(Float32, k, length(trainIter))*0.005;

In [154]:
# error("safeguard")
fname = format("ornn{:d}_2L_{:d}_{:d}_pool_{:02d}{:02d}_var.bson", style_ix, d_state, 
    k, day(today()), month(today()))
BSON.bson(fname, m2=ornn_optim_ng, nn=nn_ng, Zmu=Zmu.data, Zls=σ.(3*Zlogit_s.data));
println(fname)

In [None]:
# ornn_base, ornn_optim_ng, nn_ng, Zmap = let b=BSON.load("ornn_2L_100_2_pool_1806_v64.bson"); 
#     b[:m1], b[:m2], b[:nn], b[:Zmap]; end
# ornn_base = model.make_grad(ornn_base)
# ornn_optim = model.make_grad(ornn_optim_ng)
# ornn_optim_ng = model.make_nograd(ornn_optim)
# Zmap = Flux.param(Zmap);
# nn = mapleaves(Tracker.param, nn_ng)
# nn_ng = mapleaves(Tracker.data, nn)

#### Plot latent space

In [148]:
dset_i = 50
n_draws = 3

_Yb = reduce(hcat, [trainIters[dset_i+i][1] for i in 0:2])  #[:,5:end]
_U2 = reduce(hcat, [trainIters2[dset_i+i][2] for i in 0:2]) #[:,1:end-4]
_Tb = size(_Yb, 2)
# _eps = cholesky(cov(Zmap.data')).U * randn(Float32, 2, n_draws)
_eps = Tracker.data(Zmap)[:, rand(1:length(trainIter), n_draws)]
_eps[:,1] = Tracker.data(Zmap)[:,dset_i]

cldsY = map(1:n_draws) do i
    _ψ = nn_ng(_eps[:,i]);  
    c_ornn = model.make_rnn_psi(ornn_optim_ng, _ψ, 1f0)
    c_ornn(_U2)
#     c_ornn = model.make_rnn_psi(ornn_optim_ng, _ψ, 1f0)
#     model.build_rnn!(rnn_ng, c_ornn)
#     x̂ = reduce(hcat, [rnn_ng(_Ub[:,i]) for i in 1:_Tb])
#     let m=c_ornn; m.C*x̂ + m.D*_Ub .+ m.d; end
end

fig, axs = subplots(5,4,figsize=(10,10))
offset = 20
for i = 1:20
    axs[:][i].plot(_Yb'[:, i+offset])
    for j in 1:n_draws
        axs[:][i].plot(cldsY[j]'[:, i+offset], alpha=0.4)
    end
end

#### Visualise fit (and MT variability) for a batch

In [135]:
dset_i = 30
n_draws = 3

trainIters = collect(trainIter);
_Yb, _Ub, _h = trainIters[dset_i]
_Tb = size(_Yb, 2)
_eps = cholesky(cov(Zmap.data')).U * randn(Float32, 2, n_draws)
_eps = Zmap.data[:, rand(Categorical(ones(length(trainIter))/length(trainIter)), n_draws)]
_eps[:,1] = Zmap.data[:,dset_i]
cldsY = map(1:n_draws) do i
    _ψ = nn_ng(_eps[:,i]);    
    c_ornn = model.make_rnn_psi(ornn_optim_ng, _ψ, 1f0)
    model.build_rnn!(rnn_ng, c_ornn)
    x̂ = reduce(hcat, [rnn_ng(_Ub[:,i]) for i in 1:_Tb])
    let m=c_ornn; m.C*x̂ + m.D*_Ub .+ m.d; end
end

fig, axs = subplots(5,4,figsize=(10,10))
offset = 40
for i = 1:20
    axs[:][i].plot(_Yb'[:, i+offset])
    for j in 1:n_draws
        axs[:][i].plot(cldsY[j]'[:, i+offset], alpha=0.4)
    end
end

In [77]:
nB = length(trainIter)
plot(sqrt.(DSP.conv(history1, Windows.rect(nB))[nB:end-nB+1]/nB))