In [1]:
import re
import os
import sys
root_path = os.path.dirname(os.getcwd())
sys.path.insert(0, root_path)
sys.path.insert(0, os.path.join(root_path, "src"))

# **Multivariate Gaussian**

In [7]:
import numba
import numpy as np

def multivariate_gaussian(x, y, Hinv, Hdet):
    expoente = (-1 / 2) * np.dot(np.dot((x - y).T, Hinv), (x - y))
    denominador = np.power(2 * np.pi, len(x)) * Hdet
    return np.exp(expoente) / np.sqrt(denominador)


@numba.jit(nopython=True)
def multivariate_gaussian_numba(x, y, Hinv, Hdet):
    expoente = (-1 / 2) * np.dot(np.dot((x - y).T, Hinv), (x - y))
    denominador = np.power(2 * np.pi, len(x)) * Hdet
    return np.exp(expoente) / np.sqrt(denominador)

In [8]:
array = np.random.randn(1000,2)
kernel_size = 0.2

nrows, ncols = array.shape

H = np.power(kernel_size, 2) * np.eye(ncols)
Hinv = np.linalg.pinv(H)
Hdet = np.power(np.power(kernel_size, 2), ncols)  # nesse caso em especifico
# pdb.set_trace()

## **Com numba**

In [9]:
(-1 / nrows) * sum(
        np.log2(sum(multivariate_gaussian_numba(i, j, Hinv, Hdet) for j in array) / nrows)
        for i in array
)

3.9269910879057663

In [10]:
%%timeit
(-1 / nrows) * sum(
        np.log2(sum(multivariate_gaussian_numba(i, j, Hinv, Hdet) for j in array) / nrows)
        for i in array
)

842 ms ± 4.26 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## **Sem numba**


In [11]:
(-1 / nrows) * sum(
        np.log2(sum(multivariate_gaussian(i, j, Hinv, Hdet) for j in array) / nrows)
        for i in array
)

3.9269910879057663

In [12]:
%%timeit
(-1 / nrows) * sum(
        np.log2(sum(multivariate_gaussian(i, j, Hinv, Hdet) for j in array) / nrows)
        for i in array
)

8.58 s ± 37.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
