Merge pull request #11 from theogf/fix_definitions

Add more docs and tests
theogf · Jul 23, 2021 · bcb3de7 · bcb3de7 · theogf · Jul 23, 2021
2 parents f50b033 + 7b67ce2
commit bcb3de7
Show file tree

Hide file tree

Showing 7 changed files with 95 additions and 19 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "KLDivergences"
 uuid = "3c9cd921-3d3f-41e2-830c-e020174918cc"
 authors = ["Theo Galy-Fajou <theo.galyfajou@gmail.com> and contributors"]
-version = "0.1.2"
+version = "0.1.3"
 
 [deps]
 Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"

diff --git a/src/KLDivergences.jl b/src/KLDivergences.jl
@@ -9,7 +9,7 @@ using SpecialFunctions
 using StatsBase: StatsBase, kldivergence
 
 
-export KL, kldivergence
+export KL, kldivergence, symmetricKL
 
 """
     KL(p::Distribution, q::Distribution) -> T
@@ -19,9 +19,15 @@ Return the KL divergence of  KL(p||q), either by sampling or analytically
 """
 KL
 
-StatsBase.kldivergence(p::Sampleable, q::Sampleable) = KL(p, q)
+# This is type piracy... Bad! Bad! Bad!
+# See : https://github.com/JuliaStats/Distributions.jl/blob/master/src/functionals.jl#L32
+StatsBase.kldivergence(p::UnivariateDistribution, q::UnivariateDistribution) = KL(p, q)
+StatsBase.kldivergence(p::MultivariateDistribution, q::MultivariateDistribution) = KL(p, q)
 
-KLbase(p, q, x) = logpdf(p, x) - logpdf(q, x)
+function KLbase(p, q, x)
+    # We assume that p(x) > 0 since x is sampled from p
+    logpdf(p, x) - logpdf(q, x)
+end
 
 ## Generic fallback for multivariate Distributions
 function KL(p::UnivariateDistribution, q::UnivariateDistribution, n_samples = 1_000)

diff --git a/src/multivariate.jl b/src/multivariate.jl
@@ -1,13 +1,14 @@
 function KL(p::AbstractMvNormal, q::AbstractMvNormal)
+    # This is the generic implementation for AbstractMvNormal, you might need to specialize for your type
     length(p) == length(q) || 
         throw(DimensionMismatch("Distributions p and q have different dimensions $(length(p)) and $(length(q))"))
-    Σp = cov(p)
-    Σq = cov(q)
+    Σp, Σq = cov.((p, q))
     Δμ = mean(p) - mean(q)
-    0.5 * (tr(Σq \ Σp) + dot(Δμ / Σq, Δμ) - length(p) + logdet(Σq) - logdet(Σp))
+    0.5 * (tr(Σq \ Σp) + dot(Δμ, Σq \ Δμ) - length(p) + logdet(Σq) - logdet(Σp))
 end
 
 function KL(p::MvNormal, q::MvNormal)
+    # We use p.Σ and q.Σ to take the advantage that they are defined as PDMats objects
     length(p) == length(q) || 
         throw(DimensionMismatch("Distributions p and q have different dimensions $(length(p)) and $(length(q))"))
     0.5 * (tr(q.Σ \ p.Σ) + invquad(q.Σ, mean(p) - mean(q)) - length(p) + logdet(q.Σ) - logdet(p.Σ))

diff --git a/src/univariate.jl b/src/univariate.jl
@@ -1,30 +1,68 @@
+"""
+    KL(p::Beta, q::Beta)
+
+See [KL Beta](https://en.wikipedia.org/wiki/Beta_distribution#Quantities_of_information_(entropy))
+"""
 function KL(p::Beta, q::Beta)
-    return logbeta(q.α, q.β) - logbeta(p.α, p.β) + (p.α - q.α) * digamma(p.α) +
-        (p.β - q.β) * digamma(p.β) + (q.α - p.α + q.β - p.β) * digamma(p.α + p.β)
+    αp, βp = params(p)
+    αq, βq = params(q)
+    return logbeta(αq, βq) - logbeta(αp, βp) + (αp - αq) * digamma(αp) +
+        (βp - βq) * digamma(βp) + (αq - αp + βq - βp) * digamma(αp + βp)
 end
 
+"""
+    KL(p::Exponential, q::Exponential)
+
+See [KL Exponential](https://en.wikipedia.org/wiki/Exponential_distribution#Kullback%E2%80%93Leibler_divergence)
+"""
 function KL(p::Exponential, q::Exponential)
-    return log(p.θ) - log(q.θ) + q.θ / p.θ - 1
+    λp = scale(p)
+    λq = scale(q)
+    return log(λp) - log(λq) + λq / λp - 1
 end
 
+"""
+    KL(p::Gamma, q::Gamma)
+
+See [KL Gamma](https://en.wikipedia.org/wiki/Gamma_distribution#Kullback%E2%80%93Leibler_divergence)
+"""
 function KL(p::Gamma, q::Gamma)
-    return (p.α - q.α) * digamma(p.α) - loggamma(p.α) + loggamma(q.α) +
-        q.α * (log(q.θ) - log(p.θ)) + p.α * (p.θ - q.θ) / q.θ
+    # We use the parametrization with the rate β
+    αp, αq = shape.((p, q))
+    βp, βq = rate.((p, q))
+    return (αp - αq) * digamma(αp) - loggamma(αp) + loggamma(αq) +
+        αq * (log(βp) - log(βq)) + αp * (βq - βp) / βp
 end
 
+"""
+    KL(p::InverseGamma, q::InverseGamma)
+
+See [KL Inverse-Gamma](https://en.wikipedia.org/wiki/Inverse-gamma_distribution#Properties)
+"""
 function KL(p::InverseGamma, q::InverseGamma)
-    αp = p.invd.α; αq = q.invd.α
-    return (αp - αq) * digamma(αp) - loggamma(αp) + loggamma(αq) +
-        αq * (log(p.θ) - log(q.θ)) + αp * (q.θ - p.θ) / p.θ
+    # We can reuse the implementation of Gamma
+    return KL(Gamma(shape(p), rate(p)), Gamma(shape(q), rate(q)))
 end
 
+"""
+    KL(p::Normal, q::Normal)
+
+See [KL Gaussian](https://en.wikipedia.org/wiki/Normal_distribution#Other_properties)
+"""
 function KL(p::Normal, q::Normal)
-    return 0.5 * (var(p) / var(q) + abs2(mean(p) - mean(q)) / var(q) - 1 + 2 * (log(std(q)) - log(std(p))))
+    μp, σp = params(p)
+    μq, σq = params(q)
+    return 0.5 * (abs2(σp / σq) + abs2((μp - μq) / σq) - 1 + 2 * (log(σq) - log(σp)))
 end
 
-# λq - λp + λp log λp / λq
+"""
+    KL(p::Poisson, q::Poisson)
+
+See [KL Poisson](https://en.wikipedia.org/wiki/Poisson_distribution#Other_properties)
+"""
 function KL(p::Poisson, q::Poisson)
-    return q.λ - p.λ + p.λ * (log(p.λ) - log(q.λ))
+    λp, λq = rate.((p, q))
+    return λq - λp + λp * (log(λp) - log(λq))
 end
 
 

diff --git a/test/Project.toml b/test/Project.toml
@@ -2,4 +2,5 @@
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/test/multivariate.jl b/test/multivariate.jl
@@ -1,9 +1,29 @@
 @testset "univariate" begin
+    struct CholeskyMvNormal{TL,Tm} <: Distributions.AbstractMvNormal
+        m::Tm
+        L::TL
+    end
+    Distributions.mean(p::CholeskyMvNormal) = p.m
+    Distributions.cov(p::CholeskyMvNormal) = p.L * p.L'
+    Distributions.rand(p::CholeskyMvNormal, n::Int) = p.m .+ p.L * randn(length(p), n) 
+    Distributions.length(p::CholeskyMvNormal) = length(p.m)
+    function Distributions.logpdf(p::CholeskyMvNormal, x::AbstractVector)
+        return -0.5 * (length(p) * log(2π) + 2 * logdet(p.L) + sum(abs2, p.L \ (x .- p.m)))
+    end
+    @testset "AbstractMvNormal" begin
+        n_dim = 2
+        X1 = cholesky(Matrix(0.5 * I(n_dim))).L
+        X2 = cholesky(Matrix(0.3 * I(n_dim))).L
+        p = CholeskyMvNormal(zeros(n_dim), X1)
+        q = CholeskyMvNormal(ones(n_dim), X2)
+        @test KL(p, q) > 0
+        @test KL(p, q) ≈ KL(p, q, 100_000) atol = 0.1
+    end
     @testset "MvNormal" begin
         n_dim = 2
         p = MvNormal(zeros(n_dim), Matrix(0.5 * I(n_dim)))
         q = MvNormal(ones(n_dim), Matrix(0.3 * I(n_dim)))
         @test KL(p, q) > 0
-        @test KL(p, q) ≈ KL(p, q, 100_000) atol = 1.0
+        @test KL(p, q) ≈ KL(p, q, 100_000) atol = 0.1
     end
 end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,10 +1,20 @@
+using Distributions: kldivergence
 using KLDivergences
 using Distributions
 using LinearAlgebra
 using Random
+using StatsBase
 using Test
 
 @testset "KLDivergences.jl" begin
     include("univariate.jl")
     include("multivariate.jl")
+
+    @testset "Generic Methods" begin
+        p = Exponential(2.0)
+        q = Exponential(5.0)
+        @test symmetricKL(p, q) == symmetricKL(q, p)
+        @test kldivergence(p, q) == KL(p, q)
+    end
+
 end