In [1]:
import numpy as np
from scipy import stats
from scipy.stats import multivariate_normal

## univariate normal

* define a normal random variable with mean=2 and std=3 and draw an iid sample of size 100 

In [2]:
mu = 2
sigma = 3
N = 100

normal_rv = stats.norm(loc=mu, scale=sigma) 
data = normal_rv.rvs(size=N)

* log of probability density function at data

In [3]:
log_pdf = normal_rv.logpdf(data) 

* this is a vector of the log pdfs of each data point

In [4]:
log_pdf.shape 

(100,)

* because of independece, the log pdf of the joint distribution is equal to the sum of the individual log pdfs

In [5]:
log_pdf.sum()

-251.3357655270548

* if instead of taking the log we compute the joint pdf itself, we have to multiply the individual pdfs

In [6]:
pdf = normal_rv.pdf(data)
L = np.prod(pdf)
L

7.018817119122614e-110

* This is an example of numerical underflow - because we multiply a large number of small (but all different form zero) numbers, the value of the product is extremely small and impossible to represent very precisely by the computer, and become indistiguishable from zero. This is why when we work with probability densities, likelihoods, etc we always use logs.

## multivariate normal 

* A sample of several independent univariate normal random variables is equivalent to a __single__ draw from a multivariate normal distribution with a __diagonal__ covariance matrix

In [7]:
Sigma = (sigma**2)*np.eye(100) # covariance matrix of the joint distribution
Mu = mu*np.ones((100,1)) # mean vector of the joint distribution

* evaluate the log likelihood

In [8]:
yvec = data.reshape(-1,1) # make into a (T X 1) vector
resid = yvec - Mu

log_likelihood = -0.5 * (
        N * np.log(2 * np.pi)
        + np.log(np.linalg.det(Sigma))
        + resid.T @ np.linalg.inv(Sigma) @ resid
    ).squeeze() # without squeeze the result is a 2 dimensional array

log_likelihood

-251.33576552705463

* using the `scipy` function

In [9]:
llf = stats.multivariate_normal.logpdf(data, mean=Mu.flatten(), cov=Sigma)
llf

-251.33576552705475

In [10]:

SEED = 123456789

rng = np.random.default_rng(SEED)

In [11]:
T = 4
RHO = 0.5
K = 100

In [48]:
Sigma_rho = np.eye(T) + RHO * (1 - np.eye(T))
mu = rng.multivariate_normal(np.zeros(T), Sigma_rho, size=K)

In [49]:
mu.shape

(100, 4)

In [50]:
np.cov(mu, rowvar=False)

array([[0.94106146, 0.43707174, 0.39049117, 0.36901824],
       [0.43707174, 0.79948925, 0.45743763, 0.34374931],
       [0.39049117, 0.45743763, 1.00450303, 0.28189997],
       [0.36901824, 0.34374931, 0.28189997, 0.78824764]])

In [51]:
np.corrcoef(mu, rowvar=False)

array([[1.        , 0.50389195, 0.40163042, 0.42845725],
       [0.50389195, 1.        , 0.51044616, 0.43301624],
       [0.40163042, 0.51044616, 1.        , 0.31680211],
       [0.42845725, 0.43301624, 0.31680211, 1.        ]])

In [53]:
np.diagflat(np.ones((2,1)))

array([[1., 0.],
       [0., 1.]])

In [55]:
np.diagflat(np.ones((2,1)))

array([[1., 0.],
       [0., 1.]])