## IMPORTS

In [1]:
import numpy as np

## QUESTION ONE

Consider a simple Markov Chain structure X → Y → Z, where all variables are binary. You are required to:
(a) Write a code (using your preferred programming language) that generates a distribution (not necessarily a valid BN one) over
the 3 variables.


In [6]:
def generate_random_distribution():
    """
    Generates a random joint probability distribution P(X, Y, Z) for binary variables.
    A valid probability distribution should sum up to 1.
    
    So we can generate random values and normalize them to make them a probability distribution.
    """
    
    # We use 2 as the variable is a binary variable and it could take 0 or 1
    '''
    We have three variables X, Y, Z. So we need to generate a 2x2x2 matrix: 000, 001, 010, 011, 100, 101, 110, 111
    
    '''
    P = np.random.rand(2, 2, 2)  # Random values for P(X, Y, Z)
    P /= np.sum(P)  # Normalize to make it a probability distribution
    return P

random = generate_random_distribution()
print(random)
''' 
Output should be something like:
[[[0.023 0.073] # P(X=0, Y=0, Z=0) = 0.023, P(X=0, Y=0, Z=1) = 0.073
  [0.053 0.031]] # P(X=0, Y=1, Z=0) = 0.053, P(X=0, Y=1, Z=1) = 0.031

 [[0.1   0.1  ] # P(X=1, Y=0, Z=0) = 0.1, P(X=1, Y=0, Z=1) = 0.1
  [0.1   0.1  ]]] # P(X=1, Y=1, Z=0) = 0.1, P(X=1, Y=1, Z=1) = 0.1
'''
print(np.sum(random))
# output should be 1.0

[[[0.16099913 0.04830522]
  [0.15476067 0.06836024]]

 [[0.17018954 0.11465618]
  [0.14629027 0.13643875]]]
1.0


(b) Write a code that verifies whether a distribution is a valid BN distribution.


In [8]:
def check_bn_validity(P):
    """
    Checks if P(X, Y, Z) factorizes as P(X) P(Y|X) P(Z|Y)
    To find P(X) we need to sum over Y and Z, to find P(Y|X) we need to sum over Z and to find P(Z|Y) we need to sum over X.
    
    We can use the numpy sum function with the axis parameter to sum over the desired axes.
    
    X = 0 axis, Y = 1 axis, Z = 2 axis
    
    
    """
    P_X = np.sum(P, axis=(1, 2))  # Marginal P(X)
    ''' 
    conditional probability P(Y|X) = P(X, Y) / P(X) where P(X, Y) = summing over Z
    '''
    P_Y_given_X = np.sum(P, axis=2) / P_X[:, None]  # Conditional P(Y|X)
    '''
    conditional probability P(Z|Y) = P(Y, Z) / P(Y) where P(Y, Z) = summing over X
    
    '''
    P_Z_given_Y = P / np.sum(P, axis=0) / np.sum(P, axis=(0, 2))[:, None]  # Conditional P(Z|Y)
    
    # Reconstruct P(X, Y, Z) using the BN factorization
    P_reconstructed = np.zeros((2, 2, 2))
    for x in range(2):
        for y in range(2):
            for z in range(2):
                P_reconstructed[x, y, z] = P_X[x] * P_Y_given_X[x, y] * P_Z_given_Y[x, y, z]
    
    return np.allclose(P, P_reconstructed, atol=1e-5)

print(f"Is the random distribution a valid BN factorization? {check_bn_validity(random)}")

Is the random distribution a valid BN factorization? False


(c) Using your code, generate 10000 distributions and compute the fraction of distributions that are valid BN distributions.

In [14]:
def estimate_valid_bn_fraction(num_samples=10000):
    """Generates num_samples distributions and computes the fraction that are valid BN distributions."""
    valid_count = 0
    for _ in range(num_samples):
        P = generate_random_distribution()
        if check_bn_validity(P):
            valid_count += 1
    return valid_count / num_samples

valid_fraction = estimate_valid_bn_fraction()
print(f"Fraction of valid BN distributions: {valid_fraction:.4f}")

Fraction of valid BN distributions: 0.0000
