# Gaussian Distributions Utilities

In [None]:
#| hide
#| default_exp gaussian

## Normal Parameters

In [None]:
#| export
from collections import namedtuple
from fastcore.basics import patch

### Normal

In [None]:
import torch

In [None]:
#| export
ListNormal = namedtuple('ListNormal', ['mean', 'std'])

In [None]:
#| export
Normal = namedtuple('Normal', ['mean', 'std'])

In [None]:
#| export
@patch
def __getitem__(self: ListNormal, n:int
           )->Normal:
    """Get the mean and cov for the nth Normal distribution in the list """
    return Normal(self.mean[n], self.std[n])

In [None]:
#| export
@patch
def detach(self: ListNormal)->ListNormal:
    """Detach both mean and cov at once """
    return ListNormal(self.mean.detach(), self.std.detach())

In [None]:
ListNormal(torch.rand(10), torch.rand(10))[1]

Normal(mean=tensor(0.7015), std=tensor(0.1506))

### Multivariate Normal

In [None]:
#| export
ListMNormal = namedtuple('ListMultiNormal', ['mean', 'cov'])

In [None]:
#| export
MNormal = namedtuple('MultiNormal', ['mean', 'cov'])

In [None]:
#| export
@patch
def __getitem__(self: ListMNormal, n:int
           )->Normal:
    """Get the mean and cov for the nth Normal distribution in the list """
    return MNormal(self.mean[n], self.cov[n])

In [None]:
#| export
@patch
def detach(self: ListMNormal)->ListMNormal:
    """Detach both mean and cov at once """
    return ListMNormal(self.mean.detach(), self.cov.detach())

In [None]:
ListMNormal(torch.rand(2,10), torch.rand(2,10,10))[1]

MultiNormal(mean=tensor([0.2230, 0.6219, 0.0614, 0.4108, 0.0269, 0.8821, 0.0538, 0.4934, 0.6936,
        0.7251]), cov=tensor([[0.6778, 0.9081, 0.1468, 0.2159, 0.9794, 0.1789, 0.1370, 0.0231, 0.2010,
         0.4429],
        [0.4284, 0.9975, 0.5105, 0.8471, 0.9617, 0.2563, 0.7764, 0.2269, 0.3718,
         0.5055],
        [0.9956, 0.4071, 0.2768, 0.2514, 0.9048, 0.5803, 0.9003, 0.5310, 0.5541,
         0.2492],
        [0.0716, 0.7444, 0.7611, 0.6266, 0.6688, 0.6224, 0.0499, 0.5091, 0.2171,
         0.2908],
        [0.8675, 0.2983, 0.2988, 0.4142, 0.8150, 0.7652, 0.0637, 0.6526, 0.8359,
         0.8151],
        [0.7582, 0.4173, 0.8226, 0.8180, 0.1584, 0.3182, 0.3375, 0.9921, 0.3909,
         0.2029],
        [0.0842, 0.0666, 0.3392, 0.4647, 0.3287, 0.3195, 0.3058, 0.8487, 0.0657,
         0.9835],
        [0.6502, 0.6589, 0.3418, 0.0751, 0.3161, 0.5982, 0.0325, 0.8544, 0.7457,
         0.4674],
        [0.5499, 0.7042, 0.1712, 0.4583, 0.4671, 0.3013, 0.6306, 0.8924, 0.9835,
        

### Conditional Predictions

This add the supports for conditional predictions, which means that at the time (t) when we are making the predictions some of the variables have been actually observed. Since the model prediction is a normal distribution we can condition on the observed values and thus improve the predictions.

Therefore we need to compute the conditional distribution of a normal ^[https://cs.nyu.edu/~roweis/notes/gaussid.pdf eq, 5a, 5d]

$$ X = \left[\begin{array}{c} x \\ o \end{array} \right] $$

$$ p(X) = N\left(\left[ \begin{array}{c} \mu_x \\ \mu_o \end{array} \right], \left[\begin{array}{cc} \Sigma_{xx} & \Sigma_{xo} \\ \Sigma_{ox} & \Sigma_{oo} \end{array} \right]\right)$$

where $x$ is a vector of variable that need to predicted and $o$ is a vector of the variables that have been observed


then the conditional distribution is:

$$p(x|o) = N(\mu_x + \Sigma_{xo}\Sigma_{oo}^{-1}(o - \mu_o), \Sigma_{xx} - \Sigma_{xo}\Sigma_{oo}^{-1}\Sigma_{ox})$$

In [None]:
#| export
import torch
from torch.distributions import MultivariateNormal
from torch.linalg import cholesky
from torch import cholesky_inverse
from torch import Tensor

from fastcore.test import *
from meteo_imp.utils import *

This is the direct implementation of the equations

In [None]:
def _conditional_guassian_base(
                         μ: Tensor, # mean with shape `[n_vars]`
                         Σ: Tensor, # cov with shape `[n_vars, n_vars] `
                         obs: Tensor, # Observations with shape `[n_vars]`
                         idx: Tensor # Boolean tensor specifying for each variable is observed (True) or not (False). Shape `[n_vars]`
                        ) -> ListNormal: # Distribution conditioned on observations
    μ_x = μ[~idx]
    μ_o = μ[idx]
    
    Σ_xx = Σ[~idx,:][:, ~idx]
    Σ_xo = Σ[~idx,:][:, idx]
    Σ_ox = Σ[idx,:][:, ~idx]
    Σ_oo = Σ[idx,:][:, idx]
    
    Σ_oo_inv = torch.linalg.inv(Σ_oo)
    
    mean = μ_x + Σ_xo@Σ_oo_inv@(obs - μ_o)
    cov = Σ_xx - Σ_xo@Σ_oo_inv@Σ_ox
    
    return ListNormal(mean, cov)
    

 faster version

In [None]:
#| export
def conditional_guassian(
                         μ: Tensor, # mean with shape `[n_vars]`
                         Σ: Tensor, # cov with shape `[n_vars, n_vars] `
                         obs: Tensor, # Observations with shape `[n_obs]`, where `n_obs = sum(idx)`
                         idx: Tensor # Boolean tensor specifying for each variable is observed (True) or not (False). Shape `[n_vars]`
                        ) -> ListNormal: # Distribution conditioned on observations. shape `[n_vars - n_obs]`
    assert μ.shape[0] == idx.shape[0]
    assert obs.shape[0] == sum(idx)
    
    μ_x = μ[~idx]
    μ_o = μ[idx]
    
    Σ_xx = Σ[~idx,:][:, ~idx]
    Σ_xo = Σ[~idx,:][:, idx]
    Σ_ox = Σ[idx,:][:, ~idx]
    Σ_oo = Σ[idx,:][:, idx]
    
    Σ_oo_inv = cholesky_inverse(cholesky(Σ_oo))
    
    
    mean = μ_x + Σ_xo@Σ_oo_inv@(obs - μ_o)
    cov = Σ_xx - Σ_xo@Σ_oo_inv@Σ_ox
    
    return ListNormal(mean, cov)
    

In [None]:
# example distribution with only 2 variables
μ = torch.tensor([.5, 1.])
Σ = torch.tensor([[1., .5], [.5 ,1.]])


idx = torch.tensor([True, False]) # second variable is the observed one

obs = torch.tensor(5.) # value of second variable

gauss_cond = conditional_guassian(μ, Σ, obs, idx)

# hardcoded values to test that the code is working, see also for alternative implementation https://python.quantecon.org/multivariate_normal.html
test_close(3.25, gauss_cond.mean.item())
test_close(.75, gauss_cond.cov.item())

## Improvements

Use `cholesky` decomposition and `cholesky_solve` to improve performance of matrix inversion

see the [Probabilist machine learning course from uni Tübigen](https://uni-tuebingen.de/en/180804), specifically the code from the [Gaussian Regression Notebook](https://uni-tuebingen.de/fileadmin/Uni_Tuebingen/Fakultaeten/MatNat/Fachbereiche/Informatik/Lehrstuehle/MethMaschLern/Probabilistic_ML/Notebook_Vorlesung_7___9/Gaussian_Linear_Regression.ipynb) for details

In [None]:
#| export
def to_posdef(A):
    return A @ A.mT + 1e-3

In [None]:
n_var = 5
mean = torch.rand(n_var)
cov = to_posdef(torch.rand(n_var, n_var))
dist = MultivariateNormal(mean, cov)
idx = torch.rand(n_var) > .5
obs = torch.rand(n_var)[idx]

In [None]:
torch.linalg.inv(cov) 

In [None]:
test_close(torch.linalg.inv(cov), cholesky_inverse(torch.linalg.cholesky(cov)), eps=2e-3)

In [None]:
A = to_posdef(torch.rand(2000, 2000)) 

In [None]:
%timeit torch.linalg.inv(A)

In [None]:
%timeit cholesky_inverse(torch.linalg.cholesky(A))

The second version is a bit faster

In [None]:
test_close(conditional_guassian(dist, obs, idx).mean, _conditional_guassian_base(dist, obs, idx).mean)

In [None]:
B = to_posdef(torch.rand(n_var, n_var))

In [None]:
B @ torch.inverse(cov)

In [None]:
torch.cholesky_solve(cholesky(cov), B)

## Helper

### cov2std

In [None]:
x = torch.stack([torch.eye(3)*i for i in  range(1,4)])

In [None]:
x

In [None]:
torch.diagonal(x, dim1=1, dim2=2)

In [None]:
#| export
def cov2std(x):
    "convert cov of array of covariances to array of stddev"
    return torch.diagonal(torch.sqrt(x), dim1=1, dim2=2)

## Export

In [None]:
#| hide
from nbdev import nbdev_export
nbdev_export()