In [14]:
import numpy as np
import pandas as pd

In [51]:
class Gaussian():
    def __init__(self, mean_vec: np.array, second_vec: np.array):
        self.dim = mean_vec.shape[0]
        self.mean_vector = mean_vec
        self.var_vector = second_vec if second_vec.shape == (self.dim,) else None
        self.cov_matrix = second_vec if second_vec.shape == (self.dim,self.dim) else None

        if self.var_vector is None and self.cov_matrix is None:
            raise Exception(f"You must pass a {dim} vector for variance or {dim}x{dim} matrix for covariance")

    def log_likelihood(self, query_vec: np.array) -> float:
        n = self.dim
        mu = np.mean(self.mean_vector)
        sigma = np.std(self.var_vector) if self.var_vector is not None else np.std(self.cov_matrix)
        log_likelihood = -n/2 * np.log(2 * np.pi * sigma**2) - np.sum((query_vec - mu)**2) / (2 * sigma**2)

        return log_likelihood

In [56]:
## Test

dimension = 5

mean_vec = np.random.rand(dimension)
var_vec = np.random.rand(dimension)
cov_matrix = np.random.rand(dimension,dimension)
query_vec = np.random.rand(dimension)
gaussian_var = Gaussian(mean_vec, var_vec)
gaussian_cov = Gaussian(mean_vec, cov_matrix)

print(f"Mean vector {mean_vec}")
print("========================")
print(f"Variance vector {var_vec}")
print("========================")
print(f"Co-Variance matrix {cov_matrix}")
print("========================")
print(f"Query vector {query_vec}")
print("========================")
print(f"Log likelihood of {gaussian_var.log_likelihood(query_vec)} the variance vector gaussian")
print("========================")
print(f"Log likelihood of {gaussian_cov.log_likelihood(query_vec)} the co-variance matrix gaussian")

Mean vector [0.05578147 0.23516052 0.61151842 0.47321925 0.80700116]
Variance vector [0.51554923 0.90714461 0.43464092 0.00524586 0.4429306 ]
Co-Variance matrix [[0.07446039 0.72696807 0.19370874 0.49223035 0.26311188]
 [0.63448567 0.10140365 0.73072205 0.00744125 0.83642111]
 [0.31262387 0.10393035 0.29300821 0.16386473 0.65812498]
 [0.24431722 0.18085666 0.80465116 0.43957162 0.31153727]
 [0.91754705 0.44605677 0.4782039  0.28601834 0.46932868]]
Query vector [0.69122728 0.68913736 0.04080944 0.76169144 0.31384353]
Log likelihood of -0.8180899908886281 the variance vector gaussian
Log likelihood of -0.8763020766264806 the co-variance matrix gaussian


In [58]:
data1 = pd.read_csv('data_1.tsv', sep='\t', header=None).values
print(type(data1))
print(data1.shape)

<class 'numpy.ndarray'>
(320, 6)


In [59]:
data2 = pd.read_csv('data_2.tsv', sep='\t', header=None).values
print(type(data2))
print(data2.shape)

<class 'numpy.ndarray'>
(320, 6)


In [60]:
query = pd.read_csv('query.tsv', sep='\t', header=None).values
print(type(query))
print(query.shape)

<class 'numpy.ndarray'>
(7, 6)
