<a href="https://colab.research.google.com/github/tkorsi/Machine-Learning-Seminars/blob/main/Average%20risk.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
import numpy as np
import scipy.stats as ss

def average_risk(p1, p2, mu1, cov1, mu2, cov2, n_samples=10000, random_state=1337):
    """
    Estimate the average risk (misclassification probability) of the
    optimal Bayesian classifier for a 2-class Gaussian problem via Monte Carlo.

    Parameters
    ----------
    p1, p2 : float
        Class prior probabilities (p1 + p2 == 1).
    mu1, mu2 : array_like, shape (2,)
        Means of the two 2D Gaussian distributions.
    cov1, cov2 : array_like, shape (2, 2)
        Covariance matrices of the two 2D Gaussian distributions.
    n_samples : int, optional
        Number of samples to generate (default 10,000).
    random_state : int or np.random.Generator, optional
        Seed or random generator for reproducibility.

    Returns
    -------
    risk : float
        Monte Carlo estimate of the average 0-1 loss (misclassification probability).
    """
    rng = np.random.default_rng(random_state)

    # 1) Randomly pick class labels for each sample
    classes = rng.choice([1, 2], size=n_samples, p=[p1, p2])

    # 2) Allocate array for the 2D points
    X = np.empty((n_samples, 2))

    # Create distribution objects for each class
    mvn1 = ss.multivariate_normal(mean=mu1, cov=cov1)
    mvn2 = ss.multivariate_normal(mean=mu2, cov=cov2)

    # Draw from each distribution where needed
    mask1 = (classes == 1)
    mask2 = (classes == 2)
    X[mask1] = mvn1.rvs(size=mask1.sum(), random_state=rng)
    X[mask2] = mvn2.rvs(size=mask2.sum(), random_state=rng)

    # 3) Bayesian decision rule: predict 1 if p1*f1(x) >= p2*f2(x), else 2
    f1_vals = mvn1.pdf(X)
    f2_vals = mvn2.pdf(X)
    pred = np.where(p1 * f1_vals >= p2 * f2_vals, 1, 2)

    # 4) Misclassification rate (0–1 loss)
    risk = np.mean(pred != classes)
    return risk


In [21]:
# Example usage:
p1 = 0.3
p2 = 0.7
mu1 = np.array([0, 0])
mu2 = np.array([2, 1])
cov1 = np.array([[1, 0.2], [0.2, 1]])
cov2 = np.array([[1, -0.3], [-0.3, 1]])

risk_estimate = average_risk(0.3, 0.7, [0, 0], [[1, 0.5], [0.5, 1]], [2.5, 0], [[1, -0.5], [-0.5, 1]])
print("Estimated average risk =", risk_estimate)


Estimated average risk = 0.0873
