In [1]:
# Implement the main EM algorithm for the mixture of Gaussians model
    # Input: data file, number of clusters, number of iterations
    # Output: the final parameters of the model

In [2]:
# Expectation-Maximization algorithm
import numpy as np
import matplotlib.pyplot as plt
import sys
import math

In [3]:
def main():
    # Read the input arguments
    if len(sys.argv) != 4:
        print("Usage: python3 em.py <data_file> <num_clusters> <num_iterations>")
        sys.exit(1)

    data_file = sys.argv[1]
    num_clusters = int(sys.argv[2])
    num_iterations = int(sys.argv[3])

    # Load the data
    data = np.loadtxt(data_file)
    num_data_points, num_features = data.shape

    # Initialize the parameters
    weights = np.ones(num_clusters) / num_clusters
    means = np.random.rand(num_clusters, num_features)
    covariances = np.array([np.eye(num_features)] * num_clusters)

    # Run the EM algorithm
    for iteration in range(num_iterations):
        # E-step: calculate the responsibilities
        responsibilities = np.zeros((num_data_points, num_clusters))
        for k in range(num_clusters):
            responsibilities[:, k] = weights[k] * multivariate_gaussian(data, means[k], covariances[k])
        responsibilities /= responsibilities.sum(axis=1, keepdims=True)

        # M-step: update the parameters
        Nk = responsibilities.sum(axis=0)
        weights = Nk / num_data_points
        means = (responsibilities.T @ data) / Nk[:, np.newaxis]
        for k in range(num_clusters):
            diff = data - means[k]
            covariances[k] = (responsibilities[:, k][:, np.newaxis] * diff).T @ diff / Nk[k]

    # Print the final parameters
    print("Final weights:", weights)
    print("Final means:", means)
    print("Final covariances:", covariances)

def multivariate_gaussian(x, mean, covariance):
    d = x.shape[1]
    diff = x - mean
    exponent = -0.5 * np.sum(diff @ np.linalg.inv(covariance) * diff, axis=1)
    return (1 / ((2 * np.pi) ** (d / 2) * np.linalg.det(covariance) ** 0.5)) * np.exp(exponent)
if __name__ == "__main__":
    main()



Usage: python3 em.py <data_file> <num_clusters> <num_iterations>


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


### A) The gaussian 2-dim data on file  2gaussian.txt  has been generated  using a mixture  of  two Gaussians, each  2-dim, with the parameters below. Run the EM algorithm with random initial values to recover the parameters.
mean_1 [3,3]); cov_1 = [[1,0],[0,3]]; n1=2000 points

mean_2 =[7,4]; cov_2 = [[1,0.5],[0.5,1]]; ; n2=4000 points

### B) Same problem for 2-dim data on file 3gaussian.txt , generated using a mixture of three Gaussians. Verify your  findings against the true parameters used generate the data below.
mean_1 = [3,3] ; cov_1 = [[1,0],[0,3]]; n1=2000

mean_2 = [7,4] ; cov_2 = [[1,0.5],[0.5,1]] ; n2=3000

mean_3 = [5,7] ; cov_3 = [[1,0.2],[0.2,1]]    ); n3=5000
