# Learning

In [47]:
from hmm.hmm import HMM
from hmm.learning import hard_assignment_em, learn_parameters_everything_observed
from hmm.types import IntArray

import numpy as np

In [48]:
gamma = 0.1
beta = 0.2
alpha = 0.9
rates = [1, 5]

# This is uppercase-gamma.
transition_matrix = np.array(
    [[1 - gamma, 0, gamma], [0, 1 - gamma, gamma], [beta / 2, beta / 2, 1 - beta]]
)

In [49]:
hmm = HMM(transition_matrix, alpha, processing_modes=[0, 1, 2], rates=rates)

### Simulated data

In [50]:
num_nodes = 8
time_steps = 1000
initial_c = 2

In [51]:
observed_processing_modes, observed_focus, observed_stimuli = hmm.forward(
    num_nodes,
    time_steps,
    initial_c,
)

### Learning with everything observed

In [52]:
# This is necessary for mask computation.
observed_processing_modes: IntArray = np.array(observed_processing_modes)

In [53]:
(
    lambda_0_hat,
    lambda_1_hat,
    learned_alpha,
    learned_beta,
    learned_gamma
) = learn_parameters_everything_observed(
    observed_processing_modes,
    observed_focus,
    observed_stimuli
)

In [54]:
observed_focus.shape

(1000, 8)

Learned parameters ...

In [55]:
learned_rates = [lambda_0_hat, lambda_1_hat]
learned_transition_matrix = np.array(
    [[1 - learned_gamma, 0, learned_gamma],
     [0, 1 - learned_gamma, learned_gamma],
     [learned_beta / 2, learned_beta / 2, 1 - learned_beta]]
)

In [56]:
learned_hmm = HMM(
    transition=learned_transition_matrix,
    alpha=learned_alpha,
    processing_modes=hmm.processing_modes,
    rates=learned_rates
)

In [57]:
learned_hmm = hard_assignment_em(observed_stimuli, observed_focus, HMM(transition_matrix, alpha, processing_modes=[0, 1, 2], rates=rates))

Found good after 4 iterations!


In [58]:
print(f"True alpha: {hmm.alpha}, Learned alpha: {learned_hmm.alpha}")
print(f"True rates: {hmm.rates}, Learned rates: {learned_hmm.rates}")
print(f"True beta: {beta}, Learned beta: {learned_beta}")
print(f"True gamma: {gamma}, Learned gamma: {learned_gamma}")
print(f"True transition matrix:\n{hmm.transition}")
print(f"Learned transition matrix:\n{learned_hmm.transition}")

True alpha: 0.9, Learned alpha: 0.8761961722488039
True rates: [1, 5], Learned rates: [0.991123439667129, 4.953356086461889]
True beta: 0.7, Learned beta: 0.6899441340782123
True gamma: 0.4, Learned gamma: 0.3837753510140406
True transition matrix:
[[0.6  0.   0.4 ]
 [0.   0.6  0.4 ]
 [0.35 0.35 0.3 ]]
Learned transition matrix:
[[0.68292683 0.         0.31707317]
 [0.         0.68292683 0.31707317]
 [0.37205387 0.37205387 0.25589226]]


### Testing the learned model (everything observed)

In [59]:
true_processing_modes, true_focus, observations = hmm.forward(
    num_nodes,
    time_steps,
    initial_c,
)

In [60]:
original_marginals_c, original_marginals_z = hmm.nielslief_propagation(observations)
learned_marginals_c, learned_marginals_z = learned_hmm.nielslief_propagation(observations)

In [61]:
original_marginals_c, original_marginals_z = hmm.nielslief_propagation(observations)
learned_marginals_c, learned_marginals_z = learned_hmm.nielslief_propagation(observations)

In [62]:
def check_correctness(marginals_c, marginals_z, hmm_to_use) -> None:
    estimated_C = np.argmax(marginals_c, axis=1)
    # Compute the most likely Z given the estimated C
    estimated_Z = np.zeros((time_steps, num_nodes), dtype=int)

    for t, c in enumerate(estimated_C):
        estimated_Z[t] = hmm_to_use.sample_hidden_z(num_nodes, c)

    correct_C = np.sum(np.equal(estimated_C, true_processing_modes)) / (time_steps - 1)
    correct_Z = np.sum(true_focus == estimated_Z) / (time_steps * num_nodes)

    print(f"Proportion of correct C estimations: {correct_C:.2f}")
    print(f"Proportion of correct Z estimations: {correct_Z:.2f}")

In [63]:
check_correctness(original_marginals_c, original_marginals_z, hmm)
check_correctness(learned_marginals_c, learned_marginals_z, learned_hmm)

Proportion of correct C estimations: 0.86
Proportion of correct Z estimations: 0.70
Proportion of correct C estimations: 0.86
Proportion of correct Z estimations: 0.70


In [64]:
training_data = np.genfromtxt("../../data/Ex_1.csv", delimiter="," ,dtype=int)[1:, 1:]

original_marginals_c, original_marginals_z = hmm.nielslief_propagation(training_data)

check_correctness(original_marginals_c, original_marginals_z, learned_hmm)

ValueError: operands could not be broadcast together with shapes (100,) (1000,) 

## Learning just from $\textbf{X}$ (full learning)

Compute $\hat{Z}_{t,i} = \argmax_z P(Z_{t,i} = z | \textbf{X} = \textbf{x})$ and $\hat{C}_t = \argmax_z P(C_t = z | \textbf{X} = \textbf{x})$

In [None]:
# Whatever. We're just using some joint-prob, taking from above. :)
z_hat, c_hat = hmm.nielslief_propagation(training_data)

In [None]:
c_hat

array([[[       nan,        nan],
        [       nan,        nan],
        [       nan,        nan],
        ...,
        [       nan,        nan],
        [       nan,        nan],
        [       nan,        nan]],

       [[0.94585607, 0.05414393],
        [0.87863392, 0.12136608],
        [0.93165557, 0.06834443],
        ...,
        [0.90930144, 0.09069856],
        [0.85394233, 0.14605767],
        [0.87863392, 0.12136608]],

       [[0.8707311 , 0.1292689 ],
        [0.84702603, 0.15297397],
        [0.8707311 , 0.1292689 ],
        ...,
        [0.84702603, 0.15297397],
        [0.91021538, 0.08978462],
        [0.90176947, 0.09823053]],

       ...,

       [[0.89397903, 0.10602097],
        [0.86871493, 0.13128507],
        [0.95272963, 0.04727037],
        ...,
        [0.86871493, 0.13128507],
        [0.89362327, 0.10637673],
        [0.88826423, 0.11173577]],

       [[0.87471602, 0.12528398],
        [0.87471602, 0.12528398],
        [0.8574381 , 0.1425619 ],
        .

Learning ...

In [None]:
epochs: int = 10 # lol.

In [None]:
hmm = HMM(transition_matrix, alpha, processing_modes=[0, 1, 2], rates=rates)

In [None]:
for _ in range(epochs):
    joint_prob = hmm.infer(observations)
    z_hat, c_hat = expectation_maximisation_hard_assignment(joint_prob, num_nodes=num_nodes)

    (
        lambda_0_hat,
        lambda_1_hat,
        learned_alpha,
        learned_beta,
        learned_gamma
    ) = learn_parameters_everything_observed(
        c_hat,
        z_hat,
        observations[:-1]
    )

    learned_rates = [lambda_0_hat, lambda_1_hat]
    learned_transition_matrix = np.array(
        [[1 - learned_gamma, 0, learned_gamma],
        [0, 1 - learned_gamma, learned_gamma],
        [learned_beta / 2, learned_beta / 2, 1 - learned_beta]]
    )

    hmm = HMM(learned_transition_matrix, alpha=learned_alpha, processing_modes=hmm.states, rates=learned_rates)


AttributeError: 'HMM' object has no attribute 'infer'

In [None]:
learned_joint_prob = hmm.infer(observations)

: 

In [None]:
marginal_prob_C = np.sum(learned_joint_prob, axis=2)

estimated_C = np.argmax(marginal_prob_C, axis=1)
estimated_Z = np.zeros((time_steps, num_nodes), dtype=int)

for t, c in enumerate(estimated_C):
    estimated_Z[t] = hmm.sample_hidden_z(num_nodes, c)

correct_C = np.sum(np.equal(estimated_C, true_processing_modes[:-1])) / (time_steps - 1)
correct_Z = np.sum(estimated_Z == true_focus) / ((time_steps - 1) * num_nodes)

print(f"Proportion of correct C estimations: {correct_C:.2f}")
print(f"Proportion of correct Z estimations: {correct_Z:.2f}")

: 

: 