In [1]:
import scipy.io
import numpy as np
from scipy.stats import multivariate_normal

In [2]:
# Load the dataset
mat_data = scipy.io.loadmat('HW8.mat')
train_x = mat_data['train_x']  # Training data
train_y = mat_data['train_y'].flatten()  # Training labels

test_x = mat_data['test_x']  # Test data
test_y = mat_data['test_y'].flatten()  # True test labels

In [3]:
# Estimate class priors
P_w1 = np.mean(train_y == 1)
P_w2 = np.mean(train_y == 2)

In [4]:
# Compute ML estimates of mean and covariance for each class
mu_w1 = np.mean(train_x[train_y == 1], axis=0)
mu_w2 = np.mean(train_x[train_y == 2], axis=0)

sigma_w1 = np.cov(train_x[train_y == 1], rowvar=False)
sigma_w2 = np.cov(train_x[train_y == 2], rowvar=False)

In [5]:
# Define Gaussian pdfs
pdf_w1 = multivariate_normal(mean=mu_w1, cov=sigma_w1)
pdf_w2 = multivariate_normal(mean=mu_w2, cov=sigma_w2)

In [6]:
# Classify test points
Btest_y = np.where(
    P_w1 * pdf_w1.pdf(test_x) > P_w2 * pdf_w2.pdf(test_x),
    1, 2
)

In [7]:
# Compute classification error
error_rate = np.mean(Btest_y != test_y)

In [8]:
# Print results
print(f"Estimated class priors: P(w1) = {P_w1:.4f}, P(w2) = {P_w2:.4f}")
print(f"Classification error: {error_rate:.4f}")

Estimated class priors: P(w1) = 0.5000, P(w2) = 0.5000
Classification error: 0.1500


# Explanation of Methodology

1. **Estimate Class Priors**: We computed the prior probabilities of each class based on the training labels.
2. **Maximum Likelihood Estimates (MLE)**: We estimated the mean and covariance for each class using MLE.
3. **Define Gaussian PDFs**: Using the estimated parameters, we modeled each class with a multivariate normal distribution.
4. **Bayesian Classification Rule**: Each test point was assigned to the class that maximized the posterior probability.
5. **Error Rate Computation**: We calculated the classification error by comparing the predicted labels with the true test labels.

The final results include the estimated priors and the classification error rate.