# Learning

In [12]:
from hmm.hmm import HMM
from hmm.learning import hard_assignment_em, learn_parameters_everything_observed
from hmm.types import IntArray

import numpy as np

In [13]:
gamma = 0.5
beta = 0.8
alpha = 0.6
rates = [1, 20]

# This is uppercase-gamma.
transition_matrix = np.array(
    [[1 - gamma, 0, gamma], [0, 1 - gamma, gamma], [beta / 2, beta / 2, 1 - beta]]
)

In [14]:
hmm = HMM(transition_matrix, alpha, processing_modes=[0, 1, 2], rates=rates)

### Simulated data

In [15]:
num_nodes = 8
time_steps = 1000
initial_c = 2

In [16]:
observed_processing_modes, observed_focus, observed_stimuli = hmm.forward(
    num_nodes,
    time_steps,
    initial_c,
)

### Learning with everything observed

In [17]:
# This is necessary for mask computation.
observed_processing_modes: IntArray = np.array(observed_processing_modes)

In [18]:
(
    lambda_0_hat,
    lambda_1_hat,
    learned_alpha,
    learned_beta,
    learned_gamma
) = learn_parameters_everything_observed(
    observed_processing_modes,
    observed_focus,
    observed_stimuli
)

[0 0 1 ... 0 1 1]
1509 2536


In [19]:
observed_focus.shape

(1000, 8)

Learned parameters ...

In [20]:
learned_rates = [lambda_0_hat, lambda_1_hat]
learned_transition_matrix = np.array(
    [[1 - learned_gamma, 0, learned_gamma],
     [0, 1 - learned_gamma, learned_gamma],
     [learned_beta / 2, learned_beta / 2, 1 - learned_beta]]
)

In [21]:
learned_hmm = HMM(
    transition=learned_transition_matrix,
    alpha=learned_alpha,
    processing_modes=hmm.processing_modes,
    rates=learned_rates
)

In [22]:
learned_hmm = hard_assignment_em(observed_stimuli, observed_focus, HMM(transition_matrix, alpha, processing_modes=[0, 1, 2], rates=rates))

[0 0 1 ... 0 1 1]
1851 2736
[1 0 0 1 1 1 1 1 1 1 1 0 1 0 1 0 0 1 1 1 0 1 1 0 1 1 1 0 0 0 1 0 0 0 1 0 1
 1 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1 1 1 0 1 0 1 0
 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 0 0 1 1 1 0 1 1 1 1
 1 1 1 1 1 1 0 0 1 0 1 0 0 1 0 1 0 1 1 1 0 0 1 0 1 1 1 1 0 1 0 1 0 0 0 1 1
 1 1 1 0 0 1 1 1 1 0 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 1 1 1 0 1 0 1 0 1 1
 1 1 1 1 1 0 0 1 0 1 1 1 0 1 0 1 0 1 0 0 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1
 0 0 1 1 1 1 1 1 1 0 1 1 1 1 0 1 0 1 1 0 0 1 1 1 1 0 1 1 1 1 1 1 0 1 1 0 1
 0 1 1 0 1 0 1 1 1 0 0 0 1 1 1 0 1 1 1 0 1 0 1 0 1 0 1 1 1 1 1 1 1 0 1 1 1
 0 1 1 0 1 1 0 1 0 1 1 0 1 1 0 1 1 1 1 1 0 0 1 1 0 1 0 1 1 1 0 1 1 1 1 0 1
 1 1 1 0 0 0 1 1 1 0 1 0 1 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 0 1 1 0
 1 1 1 1 0 1 1 0 1 1 0 0 1 1 1 0 1 1 1 1 1 0 1 1 1 0 1 0 1 1 1 1 0 1 1 1 0
 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 0 1 0 1 1 1 1 1 1 1 0 0 1 0 0 1 1 0 0 1
 1 0 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 0 1 0 0 1 0 0 1 1 1 1 1 1 0

In [23]:
print(f"True alpha: {hmm.alpha}, Learned alpha: {learned_hmm.alpha}")
print(f"True rates: {hmm.rates}, Learned rates: {learned_hmm.rates}")
print(f"True transition matrix:\n{hmm.transition}")
print(f"Learned transition matrix:\n{learned_hmm.transition}")

True alpha: 0.6, Learned alpha: 0.6022727272727273
True rates: [1, 20], Learned rates: [0.9855951478392722, 19.971555775414295]
True transition matrix:
[[0.5 0.  0.5]
 [0.  0.5 0.5]
 [0.4 0.4 0.2]]
Learned transition matrix:
[[0.996997 0.       0.003003]
 [0.       0.996997 0.003003]
 [0.001001 0.001001 0.997998]]


### Testing the learned model (everything observed)

In [24]:
true_processing_modes, true_focus, observations = hmm.forward(
    num_nodes,
    time_steps,
    initial_c,
)

In [25]:
original_marginals_c, original_marginals_z = hmm.nielslief_propagation(observations)
learned_marginals_c, learned_marginals_z = learned_hmm.nielslief_propagation(observations)

In [26]:
original_marginals_c, original_marginals_z = hmm.nielslief_propagation(observations)
learned_marginals_c, learned_marginals_z = learned_hmm.nielslief_propagation(observations)

In [27]:
def check_correctness(marginals_c, marginals_z, hmm_to_use) -> None:
    estimated_C = np.argmax(marginals_c, axis=1)
    # Compute the most likely Z given the estimated C
    estimated_Z = np.zeros((time_steps, num_nodes), dtype=int)

    for t, c in enumerate(estimated_C):
        estimated_Z[t] = hmm_to_use.sample_hidden_z(num_nodes, c)

    correct_C = np.sum(np.equal(estimated_C, true_processing_modes)) / (time_steps - 1)
    correct_Z = np.sum(true_focus == estimated_Z) / (time_steps * num_nodes)

    print(f"Proportion of correct C estimations: {correct_C:.2f}")
    print(f"Proportion of correct Z estimations: {correct_Z:.2f}")

In [28]:
check_correctness(original_marginals_c, original_marginals_z, hmm)
check_correctness(learned_marginals_c, learned_marginals_z, learned_hmm)

Proportion of correct C estimations: 0.49
Proportion of correct Z estimations: 0.53
Proportion of correct C estimations: 0.38
Proportion of correct Z estimations: 0.49


In [29]:
training_data = np.genfromtxt("../../data/Ex_1.csv", delimiter="," ,dtype=int)[1:, 1:]

original_marginals_c, original_marginals_z = hmm.nielslief_propagation(training_data)

check_correctness(original_marginals_c, original_marginals_z, learned_hmm)

ValueError: operands could not be broadcast together with shapes (100,) (1000,) 

## Learning just from $\textbf{X}$ (full learning)

Compute $\hat{Z}_{t,i} = \argmax_z P(Z_{t,i} = z | \textbf{X} = \textbf{x})$ and $\hat{C}_t = \argmax_z P(C_t = z | \textbf{X} = \textbf{x})$

In [None]:
# Whatever. We're just using some joint-prob, taking from above. :)
z_hat, c_hat = hmm.nielslief_propagation(training_data)

In [None]:
c_hat

array([[[           nan,            nan],
        [           nan,            nan],
        [           nan,            nan],
        ...,
        [           nan,            nan],
        [           nan,            nan],
        [           nan,            nan]],

       [[9.99761761e-01, 2.38238645e-04],
        [9.99546836e-01, 4.53163638e-04],
        [6.96979083e-01, 3.03020917e-01],
        ...,
        [6.96979076e-01, 3.03020924e-01],
        [9.97465135e-01, 2.53486468e-03],
        [9.99546836e-01, 4.53163638e-04]],

       [[9.47340821e-01, 5.26591788e-02],
        [9.86833363e-01, 1.31666373e-02],
        [9.47340821e-01, 5.26591788e-02],
        ...,
        [9.86833363e-01, 1.31666373e-02],
        [9.99863423e-01, 1.36577216e-04],
        [8.42023569e-01, 1.57976431e-01]],

       ...,

       [[9.99531414e-01, 4.68585705e-04],
        [9.86651045e-01, 1.33489553e-02],
        [9.99822914e-01, 1.77085656e-04],
        ...,
        [9.86651045e-01, 1.33489553e-02],
     

Learning ...

In [None]:
epochs: int = 10 # lol.

In [None]:
hmm = HMM(transition_matrix, alpha, processing_modes=[0, 1, 2], rates=rates)

In [None]:
for _ in range(epochs):
    joint_prob = hmm.infer(observations)
    z_hat, c_hat = expectation_maximisation_hard_assignment(joint_prob, num_nodes=num_nodes)

    (
        lambda_0_hat,
        lambda_1_hat,
        learned_alpha,
        learned_beta,
        learned_gamma
    ) = learn_parameters_everything_observed(
        c_hat,
        z_hat,
        observations[:-1]
    )

    learned_rates = [lambda_0_hat, lambda_1_hat]
    learned_transition_matrix = np.array(
        [[1 - learned_gamma, 0, learned_gamma],
        [0, 1 - learned_gamma, learned_gamma],
        [learned_beta / 2, learned_beta / 2, 1 - learned_beta]]
    )

    hmm = HMM(learned_transition_matrix, alpha=learned_alpha, processing_modes=hmm.states, rates=learned_rates)


AttributeError: 'HMM' object has no attribute 'infer'

In [None]:
learned_joint_prob = hmm.infer(observations)

: 

In [None]:
marginal_prob_C = np.sum(learned_joint_prob, axis=2)

estimated_C = np.argmax(marginal_prob_C, axis=1)
estimated_Z = np.zeros((time_steps, num_nodes), dtype=int)

for t, c in enumerate(estimated_C):
    estimated_Z[t] = hmm.sample_hidden_z(num_nodes, c)

correct_C = np.sum(np.equal(estimated_C, true_processing_modes[:-1])) / (time_steps - 1)
correct_Z = np.sum(estimated_Z == true_focus) / ((time_steps - 1) * num_nodes)

print(f"Proportion of correct C estimations: {correct_C:.2f}")
print(f"Proportion of correct Z estimations: {correct_Z:.2f}")

: 

: 