In [1]:
import numpy as np

From Section 9 of http://ai.stanford.edu/~jduchi/projects/general_notes.pdf :


$D_{KL}\Big(P_1 \parallel P_2 \Big) = \{\dots\} = \frac{1}{2}\Big( \log \frac{\det\Sigma_2}{\det\Sigma_1} - n + tr{\Sigma^{-1}_2\Sigma_1}  + (\mu_2 - \mu_1) \Sigma^{-1}_2 (\mu_2 - \mu_1) \Big)$


$\log{\frac{\det\Sigma_2}{\det\Sigma_1}} = \sum_{i=1}^n{\big[\log{\sigma_{2i} - \log{\sigma_{1i}}} \big]}$

$\Sigma_2^{-1} =  diag{(\frac{1}{\sigma_{21}}, \dots, \frac{1}{\sigma_{2n}})}$

$ tr{\Sigma^{-1}_2\Sigma_1}  = \sum_{i=1}^n{\frac{\sigma_{1i}}{\sigma_{2i}}}$

$(\mu_2 - \mu_1) \Sigma^{-1}_2 (\mu_2 - \mu_1) = \sum_{i=1}^n{\frac{(\mu_{2i}-\mu_{1i})^2}{\sigma_{2i}}}$


In [80]:
def gaussian_kl_divergence(mu1, ln_var1, mu2, ln_var2): 
    n = mu1.shape[0]
    
    log_var_diff = ln_var2 - ln_var1
    
    var_diff_trace = np.sum(np.exp(log_var_diff))
    
    mudiff = np.sum(np.square(mu1-mu2) / np.exp(ln_var2))
    
    KL_sum = 0.5*(np.sum(log_var_diff) - n + var_diff_trace + mudiff)
    
    return KL_sum

#### Sanity checks

$ p_1(x) \sim \mathcal{N}(0, 1)$

$ p_2(x) \sim \mathcal{N}(10, 1)$

$ p_3(x) \sim \mathcal{N}(5, 10)$

In [123]:
n = 10
mu1 = np.zeros(n)
ln_var1 = np.ones(n)
mu2 = np.zeros(n)+10
ln_var2 = np.ones(n)
mu3 = np.zeros(n)+5
ln_var3 = np.ones(n)*np.log(10)

In [86]:
ln_var1.shape

(10,)

$D_{KL}\Big(P_1 \parallel P_1 \Big) $ = 0

In [110]:
gaussian_kl_divergence(mu1, ln_var1, mu1, ln_var1)

0.0

$D_{KL}\Big(P_1 \parallel P_2 \Big)  > D_{KL}\Big(P_1 \parallel P_3 \Big)$

In [113]:
gaussian_kl_divergence(mu1, ln_var1, mu2, ln_var2)

183.93972058572115

In [124]:
gaussian_kl_divergence(mu1, ln_var1, mu3, ln_var3)

32.406897523542348