In [1]:
import numpy as np
import pandas as pd

## Problem 1

In [2]:
T = np.array([[1/3,1/3,1/3,0,0,0],
              [1/3,1/3,0,1/3,0,0],
              [1/3,0,1/3,1/3,0,0],
              [0,.2,.2,.2,.2,.2],
              [0,0,0,.5,.5,0],
              [0,0,0,.5,0,.5]])

Oempty = np.array([.5, .5, .5, 1, .25, .25])
Ohash = np.array([0, .25, .25, 0, .5, .5])
Owall = np.array([.5, .25, .25, 0, .25, .25])

**Sub-Problem 1.1**

In [3]:
# Solve for the eigenvector of the Transition matrix transpose
e_values, e_vectors = np.linalg.eig(T.T)

# Retrieve the eigenvalue whose value is approx. 1 (python binary math)
selection = np.argmax(abs(e_values - 1) <= 1e-10)

# Extract the optimal policy and normalize to scale to 1
pi = e_vectors[:, selection] / e_vectors[:, selection].sum()
print("Stationary distribution π =", pi)

Stationary distribution π = [0.16666667 0.16666667 0.16666667 0.27777778 0.11111111 0.11111111]


**Sub-Problem 1.2**

In [4]:
# Starting from our optimal policy (a0) we follow sequential steps 1) Wall 2) Hash
action_map = {'wall': Owall, 'hash': Ohash, 'empty': Oempty}
alpha_values = {'a0': pi}
belief_distributions = []

a_prime = pi.copy()
for idx, a in enumerate(['wall', 'hash']):
    print(f'Observation {idx+1}:{a}')
    action = action_map[a]
    a_prime = a_prime.dot(T) * action
    alpha_values[f'a{idx+1}'] = a_prime
    print(f'   a{idx+1} = {a_prime}')

    # Normalize the element wise multiplication to sum to 1
    a_norm = a_prime / a_prime.sum()
    belief_distributions.append(a_norm)

Observation 1:wall
   a1 = [0.08333333 0.04166667 0.04166667 0.         0.02777778 0.02777778]
Observation 2:hash
   a2 = [0.         0.01041667 0.01041667 0.         0.00694444 0.00694444]


In [5]:
# Display the belief distributions
for i in range(2):
    print(f'e{i+1} = {belief_distributions[i]}')

e1 = [0.375  0.1875 0.1875 0.     0.125  0.125 ]
e2 = [0.  0.3 0.3 0.  0.2 0.2]



**Sub-Problem 1.3**

In [6]:
# Compute joint-probability distribution 
df = pd.DataFrame(
    {
        'Pr(X1 | e1))': belief_distributions[0],
        'Pr(X2 | e1, e2))': belief_distributions[1],
        'Pr(X1, X2 | e1, e2))': belief_distributions[0] * belief_distributions[1], 
        }
        )

# Consider only joint probability values that are nonzero.
display(df[df['Pr(X1, X2 | e1, e2))'] > 0])

Unnamed: 0,Pr(X1 | e1)),"Pr(X2 | e1, e2))","Pr(X1, X2 | e1, e2))"
1,0.1875,0.3,0.05625
2,0.1875,0.3,0.05625
4,0.125,0.2,0.025
5,0.125,0.2,0.025


**Sub-Problem 1.4**

In [7]:
beta = np.array([1, 1, 1, 1, 1, 1])
beta_values = {'b2': beta}

print(f'Observation {2}:initial')
print(f'   b{2} = {beta}')
k_term = 1
for idx, a in enumerate(['hash', 'wall']):
    print(f'Observation {k_term-idx}:{a}')
    action = action_map[a]
    beta = (beta * action).dot(T.T)
    print(f'   b{k_term-idx} = {beta}')
    beta_values[f'b{k_term-idx}'] = beta

Observation 2:initial
   b2 = [1 1 1 1 1 1]
Observation 1:hash
   b1 = [0.16666667 0.08333333 0.08333333 0.3        0.25       0.25      ]
Observation 0:wall
   b0 = [0.04166667 0.03472222 0.03472222 0.03333333 0.03125    0.03125   ]


**Sub-Problem 1.5**

In [8]:
# Multiply element-wise in reverse order for beta and alpha 
gamma_values = {}

n = len(alpha_values)
for i in range(n):
    X = beta_values[f'b{i}'] * alpha_values[f'a{i}']
    # Normalize the the gamma values to determine
    print(f'gamma{i}:{X/X.sum()}')
    gamma_values[f'g{i}'] = X/X.sum()

gamma0:[0.2        0.16666667 0.16666667 0.26666667 0.1        0.1       ]
gamma1:[0.4 0.1 0.1 0.  0.2 0.2]
gamma2:[0.  0.3 0.3 0.  0.2 0.2]


**Sub-Problem 1.6**

In [9]:
gamma_matrix = np.array([i for i in gamma_values.values()])

# gamma one corresponds with the wall
print(f"Pr(e1|X) = {gamma_values['g1'] / gamma_matrix[1:].sum(axis=0)}")

# gamma two corresponds with the hash
print(f"Pr(e2|X) = {gamma_values['g2'] / gamma_matrix[1:].sum(axis=0)}")

Pr(e1|X) = [1.   0.25 0.25  nan 0.5  0.5 ]
Pr(e2|X) = [0.   0.75 0.75  nan 0.5  0.5 ]


  print(f"Pr(e1|X) = {gamma_values['g1'] / gamma_matrix[1:].sum(axis=0)}")
  print(f"Pr(e2|X) = {gamma_values['g2'] / gamma_matrix[1:].sum(axis=0)}")


**Sub-Problem 1.7**

In [10]:
# Compute the “expected” transition frequency matrices
zeta0 = np.diag(alpha_values['a0'])@T@np.diag(Owall)@np.diag(beta_values['b1'])
zeta1 = np.diag(alpha_values['a1'])@T@np.diag(Ohash)@np.diag(beta_values['b2'])

# Construct the updated model transition matrix
updated_transition = (zeta0 + zeta1).copy()

for idx, row in enumerate(updated_transition):
    updated_transition[idx, :] = row / row.sum()

print(updated_transition)

[[0.22222222 0.38888889 0.38888889 0.         0.         0.        ]
 [0.5        0.5        0.         0.         0.         0.        ]
 [0.5        0.         0.5        0.         0.         0.        ]
 [0.         0.125      0.125      0.         0.375      0.375     ]
 [0.         0.         0.         0.         1.         0.        ]
 [0.         0.         0.         0.         0.         1.        ]]


## Problem 2

In [55]:
prob_wGb = pd.DataFrame({'Bronchitis': ['T', 'F'], 'Proba': [0.6, 0.001]})
prob_bGis = pd.DataFrame({'Influenza': ['T', 'T', 'F', 'F', 'T', 'T', 'F', 'F'], 
                          'Smokes': ['T', 'F', 'T', 'F', 'T', 'F', 'T', 'F'], 
                          'Bronchitis': ['T', 'T', 'T', 'T', 'F', 'F', 'F', 'F'], 
                          'Proba': [0.99, 0.9, 0.7, 1e-4, 0.01, 0.1, 0.3, 0.9999]})
prob_i = pd.DataFrame({'Influenza': ['T', 'F'], 'Proba': [0.05, 0.95]})
prob_s = pd.DataFrame({'Smokes': ['T', 'F'], 'Proba': [0.2, 0.8]})
prob_fGi = pd.DataFrame({'Influenza': ['T', 'T', 'F', 'F'], 'Fever': ['T', 'F', 'T', 'F'], 'Proba': [0.9, 0.1, 0.05, 0.95]})

In [56]:
# First intermediate product given merge 
X1 = pd.merge(left=prob_bGis, right=prob_wGb, on='Bronchitis')
X1['Proba'] = X1['Proba_x'] * X1['Proba_y']
X1 = X1.groupby(['Influenza', 'Smokes'])['Proba'].sum().reset_index()

In [57]:
X1

Unnamed: 0,Influenza,Smokes,Proba
0,F,F,0.00106
1,F,T,0.4203
2,T,F,0.5401
3,T,T,0.59401


In [58]:
# Second intermediate product given merge 
X2 = pd.merge(left=X1, right=prob_s, on='Smokes')
X2['Proba'] = X2['Proba_x'] * X2['Proba_y']
X2 = X2.groupby(['Influenza'])['Proba'].sum().reset_index()

In [59]:
X2

Unnamed: 0,Influenza,Proba
0,F,0.084908
1,T,0.550882


In [60]:
# Third intermediate product given merge 
X3 = pd.merge(left=X2, right=prob_i, on='Influenza')
X3 ['Proba'] = X3 ['Proba_x'] * X3 ['Proba_y']
X3 = X3.groupby(['Influenza'])['Proba'].sum().reset_index()

In [61]:
X3

Unnamed: 0,Influenza,Proba
0,F,0.080663
1,T,0.027544


In [62]:
# Final intermediate product given merge 
X4 = pd.merge(left=X3, right=prob_fGi, on='Influenza')
X4 ['Proba'] = X4 ['Proba_x'] * X4 ['Proba_y']
X4 = X4.groupby(['Fever'])['Proba'].sum().reset_index()
X4 = X4.set_index('Fever')

In [63]:
X4 / X4.sum()

Unnamed: 0_level_0,Proba
Fever,Unnamed: 1_level_1
F,0.733632
T,0.266368
