## Intro

This is inspired by 
Article (likas2001probability) Likas, A. Probability density estimation using artificial neural networks Computer physics communications, Elsevier, 2001, 135, 167-175

But rather than estimating the working with a network, we will instead work with its derivitive.
This will let us replace their integration with a derivative.

Note that this method only works for compact supports



They use the PDF is given by $$p_h(x,p) = \dfrac{h(x,p)}{\int_S h(z,p) dz}$$
and in their case $h=N(x,p)$  a neural network with weight and bias parameters $p$.
Where $S$ is a compact support. (That means bounded)


But if instead we say $h=\frac{\partial N(x,p)}{\partial x}$,

then $$p_h(x,p) = \dfrac{h(x,p)}{\int_S h(z,p)}=\dfrac{\frac{\partial N(x,p)}{\partial x}}{N(max(S),p) - N(min(S), p)}$$

The denominator is ofcourse more complex for non-1D values of S.


The loss function given is the negative log-likelihood of the set of training samples $X$
$$L(p) = -\sum_{\forall x \in X} ln(h(x,p))  + |X| ln(\int_S h(z,p) dx)$$

Which befomes:

$$L(p) = -\sum_{\forall x \in X} log(\frac{\partial N(x,p)}{\partial x})  + |X|(ln(N(max(S),p)-N(min(S),p)) dx$$

In [1]:
using StatsBase
using Distributions

In [2]:
using TensorFlow
using MLDataUtils

In [3]:
using DensityEstimationML

In [4]:
immutable NeuralDensityEstimator
    sess::Session
    
    #Network nodes
    optimizer::Tensor
    conditioner::Tensor
    t::Tensor
    pdf::Tensor
end

In [5]:
function Distributions.pdf(est::NeuralDensityEstimator, t::Real)
    gr = est.sess.graph
    ts = reshape([t], (1,1))
    run(est.sess, est.pdf, Dict(est.t=>ts))[1]
end

In [6]:
function StatsBase.fit!(estimator::NeuralDensityEstimator, observations;
    epochs = 20)
    
    gr = estimator.sess.graph
    for ii in 1:epochs
        loss_o = run(estimator.sess, 
            [gr["ysmin"],gr["ysmax"],gr["true_loss"], gr["working_loss"],
                estimator.optimizer],
            Dict(estimator.t=>observations'))
        ii % 100 == 1 && println("Epoch $ii: loss: $(loss_o)")
    end
    estimator
end

In [9]:
function NeuralDensityEstimator(prob_layer_sizes, support_min, support_max)
    sess = Session(Graph())
    @tf begin
        t = placeholder(Float32, shape=[1, -1])
        smin = constant(reshape([support_min],(1,1)))
        smax = constant(reshape([support_max],(1,1)))
        
        layer_sizes= [1; prob_layer_sizes; 1]
        
        network_fun_stack = Function[Base.identity]       
       
        for ii in 2:length(layer_sizes)
            below_size = layer_sizes[ii-1]
            above_size = layer_sizes[ii]
                       
            Wii = get_variable("W_$ii", [above_size, below_size], Float32)
            Wii2  = Ops.mul(Wii, Wii; name = "W_$(ii)_squared")
            act_fun = if ii!=length(layer_sizes)
                bii = get_variable("b_$ii", [above_size, 1], Float32)
                z -> nn.sigmoid(Wii2*z .+ bii)
            else
                z-> exp(Wii2*z)
            end
            push!(network_fun_stack, z->act_fun(network_fun_stack[ii-1](z)))
        end
        
        network = network_fun_stack[end]
        
        #zsmin = network(smin)
        #zsmax = network(smax)
        
        #ysmin = min(zsmin,zsmax)
        #ysmax = max(zsmin,zsmax)
        #yt = select(zsmin[end]<zsmax[end], network(t), network(-1.*t))
        
        ysmin = TensorFlow.identity(network(smin))
        ysmax = TensorFlow.identity(network(smax))
        yt = network(t)
        
        denominator = (ysmax-ysmin) #area
        numerator = gradients(yt,t)
        pdf =numerator/denominator
        
        
        n_points = TensorFlow.shape(t)[2]
        true_loss= -reduce_sum(log(numerator))+ n_points.*log(denominator)
        
        area_loss = (1f0.-denominator)^2
        working_loss = TensorFlow.identity(true_loss) + 0.1*area_loss
        
        optimizer = train.minimize(train.AdamOptimizer(), working_loss)
        
        
        # Conditioning
        # Make sure that ysmin~=1, and ysmax~=2
        condition_loss = (1f0 - ysmin)^2 + (2f0 - ysmax)^2
        condition_optimiser = train.minimize(train.AdamOptimizer(;name="adam_cond"), condition_loss)
    end
    
    run(sess, global_variables_initializer())
    
    NeuralDensityEstimator(sess, optimizer, condition_optimiser, t, pdf)
end

NeuralDensityEstimator

In [11]:
"""
    condition(est::NeuralDensityEstimator tol = 1e-15, max_epochs=2_000)
    
"Conditions" the neural density estimate so the support extrema are mapped to 1. and 2.
This improves training by adjusting the area the network has the learn over

"""
function condition!(est::NeuralDensityEstimator, tol = 1e-15, max_epochs=2_000)
    gr = est.sess.graph
    for ii in 1:2_000
        _, ysmin, ysmax, condition_loss = run(est.sess, [est.conditioner, gr["ysmin"],gr["ysmax"], gr["condition_loss"]])
        ii % 50 == 1 && @show (ii, ysmin, ysmax, condition_loss)
        if condition_loss[1] < 1e-15
            break
        end
    end
end


condition!

In [14]:
dataset = GenerateDatasets.likas_1
data = dataset()
est = NeuralDensityEstimator([10,10], support(dataset)...)


2017-09-08 18:31:41.056072: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX TITAN X, pci bus id: 0000:01:00.0)


NeuralDensityEstimator(Session(Ptr{Void} @0x00007f4998ab07b0), <Tensor Group:1 shape=unknown dtype=Any>, <Tensor Group_2:1 shape=unknown dtype=Any>, <Tensor t:1 shape=(1, ?) dtype=Float32>, <Tensor pdf:1 shape=unknown dtype=Float64>)

In [15]:
condition!(est)
println("Conditioning Done")
fit!(est, data; epochs=20_000)

(ii, ysmin, ysmax, condition_loss) = (1, [1.00027], [1.00027], [0.999458])
(ii, ysmin, ysmax, condition_loss) = (51, [1.02225], [1.02225], [0.956487])
(ii, ysmin, ysmax, condition_loss) = (101, [1.11846], [1.12004], [0.788366])
(ii, ysmin, ysmax, condition_loss) = (151, [1.33488], [1.37219], [0.506289])
(ii, ysmin, ysmax, condition_loss) = (201, [1.48697], [1.66436], [0.349793])
(ii, ysmin, ysmax, condition_loss) = (251, [1.45201], [1.74686], [0.268391])
(ii, ysmin, ysmax, condition_loss) = (301, [1.42323], [1.79879], [0.219613])
(ii, ysmin, ysmax, condition_loss) = (351, [1.40022], [1.82891], [0.189446])
(ii, ysmin, ysmax, condition_loss) = (401, [1.38188], [1.84947], [0.168495])
(ii, ysmin, ysmax, condition_loss) = (451, [1.36648], [1.86496], [0.152543])
(ii, ysmin, ysmax, condition_loss) = (501, [1.35297], [1.87739], [0.139623])
(ii, ysmin, ysmax, condition_loss) = (551, [1.34078], [1.8878], [0.128722])
(ii, ysmin, ysmax, condition_loss) = (601, [1.32956], [1.89674], [0.119271])
(ii

Epoch 6801: loss: Any[[1.19492], [6.50007], [13134.0], [13135.8], nothing]
Epoch 6901: loss: Any[[1.19259], [6.48462], [13131.5], [13133.4], nothing]
Epoch 7001: loss: Any[[1.18905], [6.4726], [13129.2], [13131.0], nothing]
Epoch 7101: loss: Any[[1.18426], [6.46235], [13126.9], [13128.7], nothing]
Epoch 7201: loss: Any[[1.17825], [6.4523], [13124.7], [13126.6], nothing]
Epoch 7301: loss: Any[[1.17107], [6.44105], [13122.7], [13124.5], nothing]
Epoch 7401: loss: Any[[1.16285], [6.42733], [13120.6], [13122.4], nothing]
Epoch 7501: loss: Any[[1.15371], [6.41009], [13118.6], [13120.5], nothing]
Epoch 7601: loss: Any[[1.14385], [6.3885], [13116.7], [13118.5], nothing]
Epoch 7701: loss: Any[[1.13349], [6.36195], [13114.8], [13116.5], nothing]
Epoch 7801: loss: Any[[1.12283], [6.32992], [13112.8], [13114.6], nothing]
Epoch 7901: loss: Any[[1.11211], [6.29202], [13110.9], [13112.7], nothing]
Epoch 8001: loss: Any[[1.10152], [6.24806], [13109.0], [13110.8], nothing]
Epoch 8101: loss: Any[[1.091

Epoch 17701: loss: Any[[1.0], [1.26199], [13012.7], [13012.7], nothing]
Epoch 17801: loss: Any[[1.0], [1.25701], [13012.4], [13012.5], nothing]
Epoch 17901: loss: Any[[1.0], [1.25222], [13012.2], [13012.2], nothing]
Epoch 18001: loss: Any[[1.0], [1.24755], [13011.9], [13012.0], nothing]
Epoch 18101: loss: Any[[1.0], [1.24309], [13011.7], [13011.7], nothing]
Epoch 18201: loss: Any[[1.0], [1.23878], [13011.4], [13011.5], nothing]
Epoch 18301: loss: Any[[1.0], [1.2346], [13011.2], [13011.3], nothing]
Epoch 18401: loss: Any[[1.0], [1.23052], [13011.0], [13011.0], nothing]
Epoch 18501: loss: Any[[1.0], [1.22661], [13010.7], [13010.8], nothing]
Epoch 18601: loss: Any[[1.0], [1.22285], [13010.5], [13010.6], nothing]
Epoch 18701: loss: Any[[1.0], [1.21919], [13010.3], [13010.3], nothing]
Epoch 18801: loss: Any[[1.0], [1.21564], [13010.1], [13010.1], nothing]
Epoch 18901: loss: Any[[1.0], [1.21225], [13009.8], [13009.9], nothing]
Epoch 19001: loss: Any[[1.0], [1.20885], [13009.6], [13009.7], no

NeuralDensityEstimator(Session(Ptr{Void} @0x00007f4998ab07b0), <Tensor Group:1 shape=unknown dtype=Any>, <Tensor Group_2:1 shape=unknown dtype=Any>, <Tensor t:1 shape=(1, ?) dtype=Float32>, <Tensor pdf:1 shape=unknown dtype=Float64>)

In [16]:
using Plots

using MacroTools
macro plot(ex)
    @capture(ex, (x_, [ys__], tail__ )) 
    labels = repr.(ys)
    ys_expr = Expr(:hvect, ys...)
    labels_expr = Expr(:vect, repr.(ys)...)
    Expr(:call, :plot, x, ys_expr, tail..., Expr(:kw, :labels, labels_expr))
end


X=first(support(dataset)) : 0.001 : last(support(dataset))
plot(X, pdf.(est,X), xlims= support(dataset), ylims=(0,1))

In [None]:
histogram(data, nbins=100, xlims=support(dataset))

In [None]:
dataset = GenerateDatasets.magdon_ismail_and_atiya
data = dataset()
est = NeuralDensityEstimator([1024], -50,50)
fit!(est, data; epochs=10_000)

In [None]:

X=-.0 : 0.001 : 16.0
plot(X, pdf.(est,X), xlims= support(dataset), ylims=(0,1))

In [None]:
histogram(X, data, xlims= xlims=(-20,20), ylims=(0,1))

In [None]:
minimum(data)

In [None]:
@less dataset()