In [None]:
import numpy as np
from scipy.stats import multivariate_normal

In [None]:
def init_params(X, K):
    """Initialize the parameters for the GMM.
    """
    n, d = X.shape
    np.random.seed(0)  # For reproducibility

    # Randomly initialize the means
    mu = X[np.random.choice(n, K, False), :]

    # Initialize the covariance matrices as identity matrices
    Sigma = [np.eye(d) for _ in range(K)]

    # Initialize the mixing coefficients uniformly
    pi = np.full(K, 1/K)

    return mu, Sigma, pi

In [None]:
def e_step(X, mu, Sigma, pi):
    """E-step: compute responsibilities.
    """
    N, K = X.shape[0], len(pi)
    responsibilities = np.zeros((N, K))

    for k in range(K):
        responsibilities[:, k] = pi[k] * multivariate_normal.pdf(X, mean=mu[k], cov=Sigma[k])

    # Normalize across components to get the probabilities
    responsibilities /= responsibilities.sum(axis=1, keepdims=True)

    return responsibilities

In [None]:
def m_step(X, responsibilities):
    """M-step: update parameters.
    """
    N, d = X.shape
    K = responsibilities.shape[1]

    # Number of data points assigned to each component
    Nk = responsibilities.sum(axis=0)

    # Update means
    mu = np.dot(responsibilities.T, X) / Nk[:, np.newaxis]

    # Update covariances
    Sigma = []
    for k in range(K):
        diff = X - mu[k]
        Sigma_k = np.dot(responsibilities[:, k] * diff.T, diff) / Nk[k]
        Sigma.append(Sigma_k)

    # Update mixing coefficients
    pi = Nk / N

    return mu, Sigma, pi

In [None]:
def gmm_em(X, K, max_iter=100):
    """Gaussian Mixture Model Estimation using Expectation-Maximization.
    """
    # Initialize parameters
    mu, Sigma, pi = initialize_parameters(X, K)

    for iteration in range(max_iter):
        # E-step
        responsibilities = e_step(X, mu, Sigma, pi)

        # M-step
        mu, Sigma, pi = m_step(X, responsibilities)

    return mu, Sigma, pi

# Note: In a real application, you'd want to add convergence criteria based on
# the change in log-likelihood between iterations, and you'd also want to
# handle singular covariance matrices.

In [None]:
# mu, Sigma, pi = gmm_em(X, K)  # Uncomment this line to run with actual data