In [2]:
import numpy as np
import pandas as pd
import scipy.stats as ss

##### Реализуем EM алгоритм для двумерного нормального распределения

In [18]:
mu1 = np.array([0, 1])
sigma1 = np.eye(2)
mu2 = np.array([1, 0])
sigma2 = np.eye(2)
w1 = 0.5
w2 = 0.5

rng = np.random.default_rng(1337)

b = ss.bernoulli(0.3).rvs(size=(1000, 1), random_state=rng)
x1 = ss.multivariate_normal([0, 0], [[1, 0.5], [0.5, 1]]).rvs(
    size=1000, random_state=rng)
x2 = ss.multivariate_normal(
    [2.5, 0], [[1, -0.5], [-0.5, 1]]).rvs(size=1000, random_state=rng)
x = b * x1 + (1 - b) * x2

def E(mu1, sigma1, mu2, sigma2, w1, w2, x):
    h1 = w1*ss.multivariate_normal(mu1, sigma1).pdf(x)
    h2 = w2*ss.multivariate_normal(mu2, sigma2).pdf(x)
    return h1/(h1+h2), h2/(h1+h2)

def M(h1, h2, x):
    w1 = h1.mean()
    w2 = h2.mean()
    mu1 = np.sum(h1.reshape(-1, 1)*x, axis=0) / np.sum(h1)
    mu2 = np.sum(h2.reshape(-1, 1)*x, axis=0) / np.sum(h2)
    sigma1 = np.cov(x.T, aweights=h1/w1, bias=True)
    sigma2 = np.cov(x.T, aweights=h2/w2, bias=True)
    return mu1, sigma1, mu2, sigma2, w1, w2

E(mu1, sigma1, mu2, sigma2, w1, w2, x)

(array([5.33952247e-01, 6.74531127e-03, 4.18059885e-01, 5.41519517e-01,
        4.93113318e-01, 1.80364065e-02, 1.23121580e-01, 2.62232760e-01,
        4.58704465e-02, 4.11903142e-01, 2.40576121e-02, 1.33676041e-02,
        5.04703933e-01, 6.25616829e-02, 3.43234972e-02, 5.66435525e-01,
        9.60026307e-02, 1.67132553e-01, 2.70400462e-01, 1.71785938e-01,
        8.08596997e-01, 1.62651924e-01, 1.07533566e-01, 4.03024916e-01,
        7.03444210e-01, 7.54855386e-01, 7.42601947e-02, 3.71009436e-01,
        5.88279002e-01, 8.94845082e-01, 1.08206693e-01, 1.44814818e-01,
        8.51595003e-01, 4.27260520e-01, 4.08782502e-01, 5.89999426e-01,
        3.07154110e-02, 7.01685381e-03, 6.52637590e-03, 8.52610458e-03,
        2.65014114e-01, 4.28775840e-01, 1.61210597e-01, 1.17193538e-02,
        7.39354197e-01, 4.45026559e-02, 1.41796071e-02, 3.46200675e-02,
        2.31399781e-01, 1.14065531e-01, 4.78915460e-01, 4.90791182e-01,
        6.92671958e-01, 2.27495172e-01, 1.65529518e-01, 2.146449

In [19]:
while True:
    h1, h2 = E(mu1, sigma1, mu2, sigma2, w1, w2, x)
    new_mu1, new_sigma1, new_mu2, new_sigma2, new_w1, new_w2 = M(h1, h2, x)
    eps=1e-4
    if (
        (np.max(abs(new_mu1 - mu1)) < eps) and 
        (np.max(abs(new_mu2 - mu2))< eps) and
        (np.max(abs(new_sigma1 - sigma1)) < eps) and
        (np.max(abs(new_sigma2 - sigma2)) < eps)
    ):
        break
    mu1, mu2, sigma1, sigma2, w1, w2 = new_mu1, new_sigma1, new_mu2, new_sigma2, new_w1, new_w2
    
print(mu1, mu2, sigma1, sigma2, w1, w2)

ValueError: The input matrix must be symmetric positive semidefinite.