## Intro

This is inspired by 
Article (likas2001probability) Likas, A. Probability density estimation using artificial neural networks Computer physics communications, Elsevier, 2001, 135, 167-175

But rather than estimating the working with a network, we will instead work with its derivitive.
This will let us replace their integration with a derivative.

Note that this method only works for compact supports



They use the PDF is given by $$p_h(x,p) = \dfrac{h(x,p)}{\int_S h(z,p) dz}$$
and in their case $h=N(x,p)$  a neural network with weight and bias parameters $p$.
Where $S$ is a compact support. (That means bounded)


But if instead we say $h=\frac{\partial N(x,p)}{\partial x}$,

then $$p_h(x,p) = \dfrac{h(x,p)}{\int_S h(z,p)}=\dfrac{\frac{\partial N(x,p)}{\partial x}}{N(max(S),p) - N(min(S), p)}$$

The denominator is ofcourse more complex for non-1D values of S.


The loss function given is the negative log-likelihood of the set of training samples $X$
$$L(p) = -\sum_{\forall x \in X} ln(h(x,p))  + |X| ln(\int_S h(z,p) dx)$$

Which befomes:

$$L(p) = -\sum_{\forall x \in X} log(\frac{\partial N(x,p)}{\partial x})  + |X|(ln(N(max(S),p)-N(min(S),p)) dx$$

In [1]:
using StatsBase
using Distributions

In [2]:
using TensorFlow
using MLDataUtils

In [3]:
using DensityEstimationML

In [4]:
immutable NeuralDensityEstimator
    sess::Session
    
    #Network nodes
    optimizer::Tensor
    conditioner::Tensor
    t::Tensor
    pdf::Tensor
end

In [125]:
loglikelihood(est,[1,2])

LoadError: [91mNode loglikelyhood not found in graph[39m

In [177]:
function Distributions.pdf(est::NeuralDensityEstimator, t::Real)
    ts = reshape([t], (1,1))
    pdf(est, ts) |> first
end

function Distributions.pdf(est::NeuralDensityEstimator, ts::AbstractVector)
    gr = est.sess.graph
    run(est.sess, est.pdf, Dict(est.t=>ts')) |> vec
end

function Distributions.loglikelihood(est::NeuralDensityEstimator, ts::AbstractVector)
    gr = est.sess.graph
    run(est.sess, gr["loglikelihood"], Dict(est.t=>ts')) |> vec
end


In [178]:
function StatsBase.fit!(estimator::NeuralDensityEstimator, observations;
    epochs = 20)
    
    gr = estimator.sess.graph
    for ii in 1:epochs
        loss_o = run(estimator.sess, 
            [gr["ysmin"],gr["ysmax"],gr["loglikelihood"], gr["working_loss"],
                estimator.optimizer],
            Dict(estimator.t=>observations'))
        ii % 100 == 1 && println("Epoch $ii: loss: $(loss_o)")
    end
    estimator
end

In [179]:
function NeuralDensityEstimator(prob_layer_sizes, support)
    sess = Session(Graph())
    @tf begin
        t = placeholder(Float32, shape=[1, -1])
        smin = constant(reshape([minimum(support)],(1,1)))
        smax = constant(reshape([maximum(support)],(1,1)))
        
        layer_sizes= [1; prob_layer_sizes; 1]
        
        network_fun_stack = Function[Base.identity]       
       
        for ii in 2:length(layer_sizes)
            below_size = layer_sizes[ii-1]
            above_size = layer_sizes[ii]
                       
            Wii = get_variable("W_$ii", [above_size, below_size], Float32)
            Wii2  = Ops.mul(Wii, Wii; name = "W_$(ii)_squared")
            act_fun = if ii!=length(layer_sizes)
                bii = get_variable("b_$ii", [above_size, 1], Float32)
                z -> nn.sigmoid(Wii2*z .+ bii)
            else
                z-> exp(Wii2*z)
            end
            push!(network_fun_stack, z->act_fun(network_fun_stack[ii-1](z)))
        end
        
        network = network_fun_stack[end]

        
        ysmin = TensorFlow.identity(network(smin))
        ysmax = TensorFlow.identity(network(smax))
        yt = network(t)
        
        denominator = (ysmax-ysmin) #area
        numerator = gradients(yt,t)
        pdf =numerator/denominator
        
        
        n_points = TensorFlow.shape(t)[2]
        loglikelihood = reduce_sum(log(numerator)) - n_points.*log(denominator)
        
        area_loss = (1f0.-denominator)^2
        working_loss = -1*loglikelihood + 0.1*area_loss
        
        optimizer = train.minimize(train.AdamOptimizer(), working_loss)
        
        
        # Conditioning
        # Make sure that ysmin~=1, and ysmax~=2
        condition_loss = (1f0 - ysmin)^2 + (2f0 - ysmax)^2
        condition_optimiser = train.minimize(train.AdamOptimizer(;name="adam_cond"), condition_loss)
    end
    
    run(sess, global_variables_initializer())
    
    NeuralDensityEstimator(sess, optimizer, condition_optimiser, t, pdf)
end

NeuralDensityEstimator

In [180]:
"""
    condition(est::NeuralDensityEstimator tol = 1e-15, max_epochs=2_000)
    
"Conditions" the neural density estimate so the support extrema are mapped to 1. and 2.
This improves training by adjusting the area the network has the learn over

"""
function condition!(est::NeuralDensityEstimator, tol = 1e-15, max_epochs=2_000)
    gr = est.sess.graph
    for ii in 1:2_000
        _, ysmin, ysmax, condition_loss = run(est.sess, [est.conditioner, gr["ysmin"],gr["ysmax"], gr["condition_loss"]])
        ii % 50 == 1 && @show (ii, ysmin, ysmax, condition_loss)
        if condition_loss[1] < 1e-15
            break
        end
    end
end




condition!

In [181]:
dataset =GenerateDatasets.Likas1()
data = rand(dataset)
@show loglikelihood(dataset, data)
est = NeuralDensityEstimator([64,64], approximate_support(dataset))


loglikelihood(dataset, data) = -10480.494472314513


2017-09-12 18:26:36.130845: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX TITAN X, pci bus id: 0000:01:00.0)


NeuralDensityEstimator(Session(Ptr{Void} @0x00007fc39ce49a30), <Tensor Group:1 shape=unknown dtype=Any>, <Tensor Group_2:1 shape=unknown dtype=Any>, <Tensor t:1 shape=(1, ?) dtype=Float32>, <Tensor pdf:1 shape=unknown dtype=Float64>)

In [182]:
condition!(est)
println("Conditioning Done")
fit!(est, data; epochs=20_000)

(ii, ysmin, ysmax, condition_loss) = (1, [1.00289], [1.00289], [0.994246])
(ii, ysmin, ysmax, condition_loss) = (51, [1.16893], [1.16937], [0.718487])
(ii, ysmin, ysmax, condition_loss) = (101, [1.51794], [1.54766], [0.472872])
(ii, ysmin, ysmax, condition_loss) = (151, [1.49805], [1.61684], [0.39486])
(ii, ysmin, ysmax, condition_loss) = (201, [1.46297], [1.72065], [0.292374])
(ii, ysmin, ysmax, condition_loss) = (251, [1.42254], [1.79785], [0.21941])
(ii, ysmin, ysmax, condition_loss) = (301, [1.38931], [1.84119], [0.176787])
(ii, ysmin, ysmax, condition_loss) = (351, [1.36476], [1.86682], [0.150785])
(ii, ysmin, ysmax, condition_loss) = (401, [1.34582], [1.88392], [0.13307])
(ii, ysmin, ysmax, condition_loss) = (451, [1.33034], [1.89648], [0.119844])
(ii, ysmin, ysmax, condition_loss) = (501, [1.31708], [1.90636], [0.109307])
(ii, ysmin, ysmax, condition_loss) = (551, [1.30531], [1.91451], [0.100523])
(ii, ysmin, ysmax, condition_loss) = (601, [1.2946], [1.92146], [0.0929577])
(ii, 

Epoch 6701: loss: Any[[1.00794], [5.83183], [-10777.2], [10778.7], nothing]
Epoch 6801: loss: Any[[1.00746], [5.76488], [-10774.0], [10775.4], nothing]
Epoch 6901: loss: Any[[1.00699], [5.6986], [-10771.1], [10772.4], nothing]
Epoch 7001: loss: Any[[1.00654], [5.62821], [-10768.4], [10769.7], nothing]
Epoch 7101: loss: Any[[1.00611], [5.55005], [-10765.8], [10767.1], nothing]
Epoch 7201: loss: Any[[1.0057], [5.46446], [-10763.5], [10764.7], nothing]
Epoch 7301: loss: Any[[1.00532], [5.37491], [-10761.3], [10762.5], nothing]
Epoch 7401: loss: Any[[1.00496], [5.28522], [-10759.3], [10760.4], nothing]
Epoch 7501: loss: Any[[1.00463], [5.19773], [-10757.4], [10758.4], nothing]
Epoch 7601: loss: Any[[1.00432], [5.11319], [-10755.5], [10756.5], nothing]
Epoch 7701: loss: Any[[1.00402], [5.03191], [-10753.8], [10754.7], nothing]
Epoch 7801: loss: Any[[1.00375], [4.95356], [-10752.0], [10752.9], nothing]
Epoch 7901: loss: Any[[1.0035], [4.87781], [-10750.3], [10751.2], nothing]
Epoch 8001: los

Epoch 17501: loss: Any[[1.00001], [2.25562], [-10567.6], [10567.6], nothing]
Epoch 17601: loss: Any[[1.00001], [2.24834], [-10566.9], [10566.9], nothing]
Epoch 17701: loss: Any[[1.00001], [2.24113], [-10566.2], [10566.2], nothing]
Epoch 17801: loss: Any[[1.0], [2.23417], [-10565.6], [10565.6], nothing]
Epoch 17901: loss: Any[[1.0], [2.22741], [-10564.9], [10564.9], nothing]
Epoch 18001: loss: Any[[1.0], [2.2208], [-10564.3], [10564.3], nothing]
Epoch 18101: loss: Any[[1.0], [2.21452], [-10563.7], [10563.7], nothing]
Epoch 18201: loss: Any[[1.0], [2.20809], [-10563.0], [10563.0], nothing]
Epoch 18301: loss: Any[[1.0], [2.20184], [-10562.4], [10562.4], nothing]
Epoch 18401: loss: Any[[1.0], [2.19572], [-10561.8], [10561.8], nothing]
Epoch 18501: loss: Any[[1.0], [2.1897], [-10561.2], [10561.2], nothing]
Epoch 18601: loss: Any[[1.0], [2.18392], [-10560.6], [10560.6], nothing]
Epoch 18701: loss: Any[[1.0], [2.1783], [-10560.1], [10560.1], nothing]
Epoch 18801: loss: Any[[1.0], [2.17286], [

NeuralDensityEstimator(Session(Ptr{Void} @0x00007fc39ce49a30), <Tensor Group:1 shape=unknown dtype=Any>, <Tensor Group_2:1 shape=unknown dtype=Any>, <Tensor t:1 shape=(1, ?) dtype=Float32>, <Tensor pdf:1 shape=unknown dtype=Float64>)

In [183]:
function demonstration_plot(est, dataset, data=rand(dataset), args...; kwargs...)
    X = minimum(approximate_support(dataset)) : 0.01 : maximum(approximate_support(dataset))
    println("True loglikelihood      = $(loglikelihood(dataset, data))")
    println("Estimated loglikelihood = $(loglikelihood(est, data))")
    estimated_loglikelihood = loglikelihood(dataset, data)
    plot([X], [pdf(est,X), data],
        #xlims= approximate_support(dataset),
        xlims= (-10,10),
        seriestype = [:path :histogram],
        layout=(2,1),
        legend=false,
        nbins=[1  length(X)÷10],
        args...; kwargs...
    )
end



demonstration_plot (generic function with 2 methods)

In [184]:
demonstration_plot(est, dataset, data)

True loglikelihood      = -10480.494472314513
Estimated loglikelihood = [-10553.3]


In [36]:
dataset = GenerateDatasets.MagdonIsmailAndAtiya()
data = rand(dataset)
@show loglikelihood(dataset, data)
est = NeuralDensityEstimator([100], approximate_support(dataset))


loglikelihood(dataset, data) = -772.542882959356


2017-09-12 17:13:29.527306: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX TITAN X, pci bus id: 0000:01:00.0)


NeuralDensityEstimator(Session(Ptr{Void} @0x00007fc39c56e850), <Tensor Group:1 shape=unknown dtype=Any>, <Tensor Group_2:1 shape=unknown dtype=Any>, <Tensor t:1 shape=(1, ?) dtype=Float32>, <Tensor pdf:1 shape=unknown dtype=Float64>)

In [37]:
condition!(est)
fit!(est, data; epochs=10_000)

(ii, ysmin, ysmax, condition_loss) = (1, [1.00511], [1.00514], [0.989777])
(ii, ysmin, ysmax, condition_loss) = (51, [1.22405], [1.29118], [0.552626])
(ii, ysmin, ysmax, condition_loss) = (101, [1.25517], [1.9354], [0.069282])
(ii, ysmin, ysmax, condition_loss) = (151, [1.13798], [1.9875], [0.0191936])
(ii, ysmin, ysmax, condition_loss) = (201, [1.09839], [1.99411], [0.0097146])
(ii, ysmin, ysmax, condition_loss) = (251, [1.0776], [1.99634], [0.00603483])
(ii, ysmin, ysmax, condition_loss) = (301, [1.06446], [1.99744], [0.00416216])
(ii, ysmin, ysmax, condition_loss) = (351, [1.05532], [1.9981], [0.00306406])
(ii, ysmin, ysmax, condition_loss) = (401, [1.04855], [1.99852], [0.00235938])
(ii, ysmin, ysmax, condition_loss) = (451, [1.04332], [1.99882], [0.00187767])
(ii, ysmin, ysmax, condition_loss) = (501, [1.03914], [1.99903], [0.00153256])
(ii, ysmin, ysmax, condition_loss) = (551, [1.03571], [1.99919], [0.00127614])
(ii, ysmin, ysmax, condition_loss) = (601, [1.03286], [1.99931], [0

Epoch 6801: loss: Any[[1.0], [1.00149], [802.316], [802.416], nothing]
Epoch 6901: loss: Any[[1.0], [1.00151], [801.826], [801.925], nothing]
Epoch 7001: loss: Any[[1.0], [1.00148], [801.342], [801.442], nothing]
Epoch 7101: loss: Any[[1.0], [1.00146], [800.865], [800.965], nothing]
Epoch 7201: loss: Any[[1.0], [1.00146], [800.395], [800.494], nothing]
Epoch 7301: loss: Any[[1.0], [1.00143], [799.931], [800.03], nothing]
Epoch 7401: loss: Any[[1.0], [1.00144], [799.473], [799.573], nothing]
Epoch 7501: loss: Any[[1.0], [1.00142], [799.022], [799.122], nothing]
Epoch 7601: loss: Any[[1.0], [1.00142], [798.577], [798.677], nothing]
Epoch 7701: loss: Any[[1.0], [1.0014], [798.139], [798.238], nothing]
Epoch 7801: loss: Any[[1.0], [1.00139], [797.722], [797.822], nothing]
Epoch 7901: loss: Any[[1.0], [1.0014], [797.28], [797.38], nothing]
Epoch 8001: loss: Any[[1.0], [1.00138], [796.86], [796.959], nothing]
Epoch 8101: loss: Any[[1.0], [1.00136], [796.446], [796.545], nothing]
Epoch 8201: 

NeuralDensityEstimator(Session(Ptr{Void} @0x00007fc39c56e850), <Tensor Group:1 shape=unknown dtype=Any>, <Tensor Group_2:1 shape=unknown dtype=Any>, <Tensor t:1 shape=(1, ?) dtype=Float32>, <Tensor pdf:1 shape=unknown dtype=Float64>)

In [38]:
X=minimum(approximate_support(dataset)) : 0.01 : maximum(approximate_support(dataset))
plot(X, pdf.(est,X), xlims= approximate_support(dataset), ylims=(0,1))

In [39]:
histogram(data, nbins=50)

In [47]:
dataset = Arcsine(1,4)
data = rand(dataset, 5000)
@show loglikelihood(dataset, data)
est = NeuralDensityEstimator([100], approximate_support(dataset))


loglikelihood(dataset, data) = -4235.659430071057


2017-09-12 17:17:51.522335: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX TITAN X, pci bus id: 0000:01:00.0)


NeuralDensityEstimator(Session(Ptr{Void} @0x00007fc39c7458f0), <Tensor Group:1 shape=unknown dtype=Any>, <Tensor Group_2:1 shape=unknown dtype=Any>, <Tensor t:1 shape=(1, ?) dtype=Float32>, <Tensor pdf:1 shape=unknown dtype=Float64>)

In [48]:
condition!(est)
fit!(est, data; epochs=10_000)

(ii, ysmin, ysmax, condition_loss) = (1, [1.00407], [1.00407], [0.991899])
(ii, ysmin, ysmax, condition_loss) = (51, [1.24801], [1.24997], [0.624061])
(ii, ysmin, ysmax, condition_loss) = (101, [1.49691], [1.5167], [0.480492])
(ii, ysmin, ysmax, condition_loss) = (151, [1.50135], [1.54809], [0.455569])
(ii, ysmin, ysmax, condition_loss) = (201, [1.49981], [1.58203], [0.424506])
(ii, ysmin, ysmax, condition_loss) = (251, [1.4952], [1.61869], [0.390623])
(ii, ysmin, ysmax, condition_loss) = (301, [1.48773], [1.65483], [0.357022])
(ii, ysmin, ysmax, condition_loss) = (351, [1.47819], [1.68796], [0.326034])
(ii, ysmin, ysmax, condition_loss) = (401, [1.46758], [1.71682], [0.298816])
(ii, ysmin, ysmax, condition_loss) = (451, [1.45664], [1.74146], [0.275368])
(ii, ysmin, ysmax, condition_loss) = (501, [1.44569], [1.76275], [0.254926])
(ii, ysmin, ysmax, condition_loss) = (551, [1.43449], [1.78194], [0.236332])
(ii, ysmin, ysmax, condition_loss) = (601, [1.42233], [1.80038], [0.218214])
(ii,

Epoch 6901: loss: Any[[10.6612], [14.764], [4664.05], [4665.01], nothing]
Epoch 7001: loss: Any[[10.6494], [14.73], [4662.24], [4663.18], nothing]
Epoch 7101: loss: Any[[10.6443], [14.702], [4660.44], [4661.37], nothing]
Epoch 7201: loss: Any[[10.6453], [14.6794], [4658.66], [4659.58], nothing]
Epoch 7301: loss: Any[[10.6511], [14.6611], [4656.91], [4657.81], nothing]
Epoch 7401: loss: Any[[10.6598], [14.645], [4655.17], [4656.06], nothing]
Epoch 7501: loss: Any[[10.6718], [14.6317], [4653.46], [4654.33], nothing]
Epoch 7601: loss: Any[[10.6864], [14.6212], [4651.76], [4652.62], nothing]
Epoch 7701: loss: Any[[10.7028], [14.6126], [4650.09], [4650.94], nothing]
Epoch 7801: loss: Any[[10.7203], [14.6045], [4648.44], [4649.27], nothing]
Epoch 7901: loss: Any[[10.7389], [14.598], [4646.81], [4647.63], nothing]
Epoch 8001: loss: Any[[10.7572], [14.5911], [4645.2], [4646.0], nothing]
Epoch 8101: loss: Any[[10.7765], [14.5855], [4643.61], [4644.4], nothing]
Epoch 8201: loss: Any[[10.7955], [

NeuralDensityEstimator(Session(Ptr{Void} @0x00007fc39c7458f0), <Tensor Group:1 shape=unknown dtype=Any>, <Tensor Group_2:1 shape=unknown dtype=Any>, <Tensor t:1 shape=(1, ?) dtype=Float32>, <Tensor pdf:1 shape=unknown dtype=Float64>)

demo_plot (generic function with 2 methods)

In [92]:
plot([data],
    xlims= approximate_support(dataset),
    seriestype = [ :histogram],
    legend=false
)

In [61]:
plot(data2, seriestype=:histogram)

In [88]:
?histogram!

search: [1mh[22m[1mi[22m[1ms[22m[1mt[22m[1mo[22m[1mg[22m[1mr[22m[1ma[22m[1mm[22m[1m![22m [1mh[22m[1mi[22m[1ms[22m[1mt[22m[1mo[22m[1mg[22m[1mr[22m[1ma[22m[1mm[22m2d[1m![22m [1mh[22m[1mi[22m[1ms[22m[1mt[22m[1mo[22m[1mg[22m[1mr[22m[1ma[22m[1mm[22m [1mH[22m[1mi[22m[1ms[22m[1mt[22m[1mo[22m[1mg[22m[1mr[22m[1ma[22m[1mm[22m [1mh[22m[1mi[22m[1ms[22m[1mt[22m[1mo[22m[1mg[22m[1mr[22m[1ma[22m[1mm[22m2d



No documentation found.

`Plots.histogram!` is a `Function`.

```
# 1 method for generic function "histogram!":
histogram!(args...; kw...) in Plots at /home/uniwa/students2/students/20361362/linux/.julia/v0.6/RecipesBase/src/RecipesBase.jl:360
```
