## Intro

This is inspired by 
Article (likas2001probability) Likas, A. Probability density estimation using artificial neural networks Computer physics communications, Elsevier, 2001, 135, 167-175

But rather than estimating the working with a network, we will instead work with its derivitive.
This will let us replace their integration with a derivative.

Note that this method only works for compact supports



They use the PDF is given by $$p_h(x,p) = \dfrac{h(x,p)}{\int_S h(z,p) dz}$$
and in their case $h=N(x,p)$  a neural network with weight and bias parameters $p$.
Where $S$ is a compact support. (That means bounded)


But if instead we say $h=\frac{\partial N(x,p)}{\partial x}$,

then $$p_h(x,p) = \dfrac{h(x,p)}{\int_S h(z,p)}=\dfrac{\frac{\partial N(x,p)}{\partial x}}{N(max(S),p) - N(min(S), p)}$$

The denominator is ofcourse more complex for non-1D values of S.


The loss function given is the negative log-likelihood of the set of training samples $X$
$$L(p) = -\sum_{\forall x \in X} ln(h(x,p))  + |X| ln(\int_S h(z,p) dx)$$

Which befomes:

$$L(p) = -\sum_{\forall x \in X} log(\frac{\partial N(x,p)}{\partial x})  + |X|(ln(N(max(S),p)-N(min(S),p)) dx$$

In [2]:
using StatsBase
using Distributions

In [3]:
using TensorFlow
using MLDataUtils

In [4]:
using DensityEstimationML

In [5]:
immutable NeuralDensityEstimator
    sess::Session
    
    #Network nodes
    optimizer::Tensor
    conditioner::Tensor
    t::Tensor
    pdf::Tensor
end

In [6]:
function Distributions.pdf(est::NeuralDensityEstimator, t::Real)
    gr = est.sess.graph
    ts = reshape([t], (1,1))
    run(est.sess, est.pdf, Dict(est.t=>ts))[1]
end

In [7]:
function StatsBase.fit!(estimator::NeuralDensityEstimator, observations;
    epochs = 20)
    
    gr = estimator.sess.graph
    for ii in 1:epochs
        loss_o = run(estimator.sess, 
            [gr["ysmin"],gr["ysmax"],gr["true_loss"], gr["working_loss"],
                estimator.optimizer],
            Dict(estimator.t=>observations'))
        ii % 100 == 1 && println("Epoch $ii: loss: $(loss_o)")
    end
    estimator
end

In [8]:
function NeuralDensityEstimator(prob_layer_sizes, support)
    sess = Session(Graph())
    @tf begin
        t = placeholder(Float32, shape=[1, -1])
        smin = constant(reshape([minimum(support)],(1,1)))
        smax = constant(reshape([maximum(support)],(1,1)))
        
        layer_sizes= [1; prob_layer_sizes; 1]
        
        network_fun_stack = Function[Base.identity]       
       
        for ii in 2:length(layer_sizes)
            below_size = layer_sizes[ii-1]
            above_size = layer_sizes[ii]
                       
            Wii = get_variable("W_$ii", [above_size, below_size], Float32)
            Wii2  = Ops.mul(Wii, Wii; name = "W_$(ii)_squared")
            act_fun = if ii!=length(layer_sizes)
                bii = get_variable("b_$ii", [above_size, 1], Float32)
                z -> nn.sigmoid(Wii2*z .+ bii)
            else
                z-> exp(Wii2*z)
            end
            push!(network_fun_stack, z->act_fun(network_fun_stack[ii-1](z)))
        end
        
        network = network_fun_stack[end]

        
        ysmin = TensorFlow.identity(network(smin))
        ysmax = TensorFlow.identity(network(smax))
        yt = network(t)
        
        denominator = (ysmax-ysmin) #area
        numerator = gradients(yt,t)
        pdf =numerator/denominator
        
        
        n_points = TensorFlow.shape(t)[2]
        true_loss= -reduce_sum(log(numerator))+ n_points.*log(denominator)
        
        area_loss = (1f0.-denominator)^2
        working_loss = TensorFlow.identity(true_loss) + 0.1*area_loss
        
        optimizer = train.minimize(train.AdamOptimizer(), working_loss)
        
        
        # Conditioning
        # Make sure that ysmin~=1, and ysmax~=2
        condition_loss = (1f0 - ysmin)^2 + (2f0 - ysmax)^2
        condition_optimiser = train.minimize(train.AdamOptimizer(;name="adam_cond"), condition_loss)
    end
    
    run(sess, global_variables_initializer())
    
    NeuralDensityEstimator(sess, optimizer, condition_optimiser, t, pdf)
end

NeuralDensityEstimator

In [9]:
"""
    condition(est::NeuralDensityEstimator tol = 1e-15, max_epochs=2_000)
    
"Conditions" the neural density estimate so the support extrema are mapped to 1. and 2.
This improves training by adjusting the area the network has the learn over

"""
function condition!(est::NeuralDensityEstimator, tol = 1e-15, max_epochs=2_000)
    gr = est.sess.graph
    for ii in 1:2_000
        _, ysmin, ysmax, condition_loss = run(est.sess, [est.conditioner, gr["ysmin"],gr["ysmax"], gr["condition_loss"]])
        ii % 50 == 1 && @show (ii, ysmin, ysmax, condition_loss)
        if condition_loss[1] < 1e-15
            break
        end
    end
end


condition!

In [26]:
dataset =GenerateDatasets.likas_1
data = rand(dataset)
est = NeuralDensityEstimator([64,64], approximate_support(dataset))


2017-09-12 14:04:36.228905: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX TITAN X, pci bus id: 0000:01:00.0)


NeuralDensityEstimator(Session(Ptr{Void} @0x00007f236c9cbab0), <Tensor Group:1 shape=unknown dtype=Any>, <Tensor Group_2:1 shape=unknown dtype=Any>, <Tensor t:1 shape=(1, ?) dtype=Float32>, <Tensor pdf:1 shape=unknown dtype=Float64>)

In [27]:
condition!(est)
println("Conditioning Done")
fit!(est, data; epochs=20_000)

(ii, ysmin, ysmax, condition_loss) = (1, [1.00402], [1.00402], [0.992])
(ii, ysmin, ysmax, condition_loss) = (51, [1.1733], [1.17375], [0.712728])
(ii, ysmin, ysmax, condition_loss) = (101, [1.51702], [1.54638], [0.473081])
(ii, ysmin, ysmax, condition_loss) = (151, [1.49823], [1.6167], [0.395151])
(ii, ysmin, ysmax, condition_loss) = (201, [1.46303], [1.72053], [0.292506])
(ii, ysmin, ysmax, condition_loss) = (251, [1.42263], [1.79774], [0.219525])
(ii, ysmin, ysmax, condition_loss) = (301, [1.38938], [1.84115], [0.176849])
(ii, ysmin, ysmax, condition_loss) = (351, [1.36478], [1.86684], [0.150796])
(ii, ysmin, ysmax, condition_loss) = (401, [1.34579], [1.88399], [0.133031])
(ii, ysmin, ysmax, condition_loss) = (451, [1.33025], [1.8966], [0.119753])
(ii, ysmin, ysmax, condition_loss) = (501, [1.3169], [1.90654], [0.109162])
(ii, ysmin, ysmax, condition_loss) = (551, [1.30504], [1.91475], [0.100317])
(ii, ysmin, ysmax, condition_loss) = (601, [1.29421], [1.92177], [0.0926799])
(ii, ysm

Epoch 6801: loss: Any[[1.01623], [6.67737], [10724.7], [10726.9], nothing]
Epoch 6901: loss: Any[[1.01508], [6.57163], [10711.4], [10713.5], nothing]
Epoch 7001: loss: Any[[1.014], [6.47988], [10703.1], [10705.1], nothing]
Epoch 7101: loss: Any[[1.01297], [6.39386], [10697.0], [10699.0], nothing]
Epoch 7201: loss: Any[[1.012], [6.30812], [10692.1], [10694.0], nothing]
Epoch 7301: loss: Any[[1.01109], [6.2217], [10688.0], [10689.8], nothing]
Epoch 7401: loss: Any[[1.01024], [6.13423], [10684.4], [10686.1], nothing]
Epoch 7501: loss: Any[[1.00944], [6.04438], [10681.2], [10682.8], nothing]
Epoch 7601: loss: Any[[1.0087], [5.95287], [10678.3], [10679.9], nothing]
Epoch 7701: loss: Any[[1.00802], [5.85968], [10675.6], [10677.1], nothing]
Epoch 7801: loss: Any[[1.00738], [5.76407], [10673.2], [10674.6], nothing]
Epoch 7901: loss: Any[[1.00679], [5.6647], [10670.8], [10672.2], nothing]
Epoch 8001: loss: Any[[1.00624], [5.56261], [10668.7], [10669.9], nothing]
Epoch 8101: loss: Any[[1.00573],

Epoch 17701: loss: Any[[1.00001], [2.23154], [10561.6], [10561.6], nothing]
Epoch 17801: loss: Any[[1.00001], [2.22644], [10561.4], [10561.4], nothing]
Epoch 17901: loss: Any[[1.00001], [2.22152], [10561.3], [10561.3], nothing]
Epoch 18001: loss: Any[[1.00001], [2.21679], [10561.1], [10561.1], nothing]
Epoch 18101: loss: Any[[1.00001], [2.21232], [10560.9], [10560.9], nothing]
Epoch 18201: loss: Any[[1.00001], [2.20797], [10560.7], [10560.8], nothing]
Epoch 18301: loss: Any[[1.00001], [2.20388], [10560.6], [10560.6], nothing]
Epoch 18401: loss: Any[[1.00001], [2.19991], [10560.4], [10560.4], nothing]
Epoch 18501: loss: Any[[1.00001], [2.19607], [10560.3], [10560.3], nothing]
Epoch 18601: loss: Any[[1.00001], [2.1924], [10560.1], [10560.1], nothing]
Epoch 18701: loss: Any[[1.00001], [2.18875], [10560.0], [10560.0], nothing]
Epoch 18801: loss: Any[[1.00001], [2.18527], [10559.8], [10559.8], nothing]
Epoch 18901: loss: Any[[1.00001], [2.18189], [10559.7], [10559.7], nothing]
Epoch 19001: 

NeuralDensityEstimator(Session(Ptr{Void} @0x00007f236c9cbab0), <Tensor Group:1 shape=unknown dtype=Any>, <Tensor Group_2:1 shape=unknown dtype=Any>, <Tensor t:1 shape=(1, ?) dtype=Float32>, <Tensor pdf:1 shape=unknown dtype=Float64>)

In [28]:
using Plots

X=minimum(approximate_support(dataset)) : 0.001 : maximum(approximate_support(dataset))
plot(X, pdf.(est,X), xlims= approximate_support(dataset), ylims=(0,1))

In [30]:
histogram(data, nbins=100)

In [None]:
dataset = GenerateDatasets.magdon_ismail_and_atiya
data = dataset()
est = NeuralDensityEstimator([1024], -50,50)
fit!(est, data; epochs=10_000)

In [None]:

X=-.0 : 0.001 : 16.0
plot(X, pdf.(est,X), xlims= support(dataset), ylims=(0,1))

In [None]:
histogram(X, data, xlims= xlims=(-20,20), ylims=(0,1))

In [None]:
minimum(data)

In [None]:
@less dataset()