In [363]:
import math
import numpy as np

In [364]:
def Estep(X, mixture_weights, means, cov_mat, p, G):
    # multivariate gaussian PDF
    probs = np.zeros((X.shape[0], G)) # n x g shape
    for k, mu in enumerate(means): # exponent term
        probs[:,k] = np.sum((np.linalg.inv(cov_mat) @ (X - mu).T) * (X - mu).T, axis=0)
    probs = np.exp(probs * -0.5)
    probs /= math.sqrt(np.linalg.det(cov_mat) * (2 * math.pi) ** p) 

    probs *= mixture_weights

    return (probs / probs.sum(axis=1, keepdims=True)) + 1e-10 # normalizing

In [365]:
def Mstep(X, probs, p, G):
    mixture_weights = np.mean(probs, axis=0)
    means = np.zeros((G, p))
    cov_mat = np.zeros((p, p))
    for k in range(G):
        means[k] = np.sum((X.T * r[:,k]).T, 0) / sum(r[:,k])
        tmp = X - means[k]
        cov_mat_k = tmp.T @ np.diag(probs[:,k]) @ tmp
        cov_mat_k /= np.sum(probs[:,k])
        cov_mat += cov_mat_k
    return mixture_weights, means.T, cov_mat

In [366]:
faithful = open('faithful.dat')
temp = []
for row in faithful:
    temp.append(row.split()[1:])
temp = temp[1:]
for i in range(len(temp)):
    for j in range(len(temp[i])):
        temp[i][j] = float(temp[i][j])
faithful = np.array(temp)

In [371]:
## Testing G = 2
n = len(faithful)
X = faithful
p = 2
G = 2

p1 = 10/n
p2 = 1 - p1
mixture_weights = [p1, p2]

mu1 = X[:10].mean(0)
mu2 = X[10:].mean(0)
means = np.array([mu1, mu2])

cov_mat_L = sum([(X[l] - mu1).reshape((2, 1)) @ (X[l] - mu1).reshape((1, 2)) for l in range(10)])
cov_mat_R = sum([(X[l] - mu1).reshape((2, 1)) @ (X[l] - mu1).reshape((1, 2)) for l in range(10, len(X))])
cov_mat = (cov_mat_L + cov_mat_R) / n

for i in range(5):
    r = Estep(X, mixture_weights, means, cov_mat, p, G)
    mixture_weights, means, cov_mat = Mstep(X, r, p, G)
    print(mixture_weights)


[0.03602766 0.96397234]
[1.e+00 1.e-10]
[1.e+00 1.e-10]
[1.e+00 1.e-10]
[1.e+00 1.e-10]


In [373]:
probs = np.zeros((X.shape[0], G)) # n x g shape
for k, mu in enumerate(means): # exponent term
    probs[:,k] = np.sum((np.linalg.inv(cov_mat) @ (X - mu).T) * (X - mu).T, axis=0)
probs = np.exp(probs * -0.5)
probs /= math.sqrt(np.linalg.det(cov_mat) * (2 * math.pi) ** p) 
print(probs)

[[6.38217135e-20 0.00000000e+00]
 [1.28691207e-17 0.00000000e+00]
 [6.05503738e-19 0.00000000e+00]
 [9.82416634e-19 0.00000000e+00]
 [3.37584137e-18 0.00000000e+00]
 [3.33173026e-13 0.00000000e+00]
 [9.14291685e-19 0.00000000e+00]
 [8.01319761e-23 0.00000000e+00]
 [1.15327637e-15 0.00000000e+00]
 [4.81067132e-19 0.00000000e+00]
 [1.86360065e-17 0.00000000e+00]
 [1.09546667e-20 0.00000000e+00]
 [1.11531586e-16 0.00000000e+00]
 [5.47885699e-15 0.00000000e+00]
 [1.31545305e-16 0.00000000e+00]
 [4.36640490e-15 0.00000000e+00]
 [1.59774451e-21 0.00000000e+00]
 [1.34366779e-16 0.00000000e+00]
 [9.70578429e-18 0.00000000e+00]
 [7.02579907e-17 0.00000000e+00]
 [2.33879312e-16 0.00000000e+00]
 [5.47885699e-15 0.00000000e+00]
 [3.32271079e-20 0.00000000e+00]
 [5.29168708e-18 0.00000000e+00]
 [9.69261166e-14 0.00000000e+00]
 [7.86825123e-22 0.00000000e+00]
 [3.10648772e-17 0.00000000e+00]
 [2.34367526e-16 0.00000000e+00]
 [2.91132285e-18 0.00000000e+00]
 [4.31941610e-16 0.00000000e+00]
 [2.879303