# Experiments on backward variational ICA 

***Uncomment and run the following cell if you're using Collab***

In [None]:
# !rm -rf *
# !git clone https://github.com/mchagneux/backward_ica.git
# !mv backward_ica/* ./
# !rm -rf backward_ica

### Imports

In [1]:
from functools import partial
from src.eval import mse_expectation_against_true_states
from src.kalman import Kalman, NumpyKalman
from src.hmm import AdditiveGaussianHMM, LinearGaussianHMM
from src.elbo import LinearGaussianELBO
import torch
from tqdm import tqdm
torch.set_default_dtype(torch.float64) 
torch.set_default_tensor_type(torch.DoubleTensor)
# torch.set_printoptions(precision=10)

## sanity checks
hmm = LinearGaussianHMM(state_dim=2, obs_dim=2)
states, observations = hmm.sample_joint_sequence(10)

for param in hmm.model.parameters():param.requires_grad = False
likelihood_torch = Kalman(hmm.model).filter(observations)[4] #kalman with torch operators 
likelihood_numpy = NumpyKalman(hmm.model).filter(observations.numpy())[2] #kalman with numpy operators 
likelihood_via_elbo = LinearGaussianELBO(hmm.model, hmm.model)(observations) #elbo

# both should be close to 0
print(likelihood_numpy - likelihood_torch)
print(likelihood_numpy - likelihood_via_elbo)



TypeError: object.__init__() takes exactly one argument (the instance to initialize)

### 1. $p$ and $q$ are a linear Gaussian HMMs

In [2]:
hmm = LinearGaussianHMM(state_dim=2, obs_dim=2) # pick some true model p 
for param in hmm.model.parameters(): param.requires_grad = False # not learning the parameters of the true model for now 



# sampling 10 sequences from the hmm 
samples = [hmm.sample_joint_sequence(8) for _ in range(10)] 
state_sequences = [sample[0] for sample in samples]
observation_sequences = [sample[1] for sample in samples] 


# the variational model is a random LGMM with same dimensions, and we will not learn the covariances for now 
v_model = LinearGaussianHMM.get_random_model(2,2)
v_model.prior.parametrizations.cov.original.requires_grad = False
v_model.transition.parametrizations.cov.original.requires_grad = False 
v_model.emission.parametrizations.cov.original.requires_grad = False 

# the elbo object with p and q as arguments
elbo = LinearGaussianELBO(hmm.model, v_model)

# optimize the parameters of the ELBO (but theta deactivated above)
optimizer = torch.optim.Adam(params=elbo.parameters(), lr=1e-2)
true_evidence_all_sequences = sum(Kalman(hmm.model).filter(observations)[-1] for observations in observation_sequences)

print('True evidence accross all sequences:', true_evidence_all_sequences)

eps = torch.inf
# optimizing model 
while eps > 0.1:
    epoch_loss = 0.0
    for observations in observation_sequences: 
        optimizer.zero_grad()
        loss = -elbo(observations)
        loss.backward()
        optimizer.step()
        epoch_loss += -loss
    with torch.no_grad():
        eps = torch.abs(true_evidence_all_sequences - epoch_loss)
        print('Average of "L(theta, phi) - log(p_theta(x))":', eps)

True evidence accross all sequences: tensor(315.0129)
Average of "L(theta, phi) - log(p_theta(x))": tensor(5486.3074)
Average of "L(theta, phi) - log(p_theta(x))": tensor(1067.9205)
Average of "L(theta, phi) - log(p_theta(x))": tensor(417.1209)
Average of "L(theta, phi) - log(p_theta(x))": tensor(307.6697)
Average of "L(theta, phi) - log(p_theta(x))": tensor(225.8647)
Average of "L(theta, phi) - log(p_theta(x))": tensor(182.7485)
Average of "L(theta, phi) - log(p_theta(x))": tensor(144.9683)
Average of "L(theta, phi) - log(p_theta(x))": tensor(118.7143)
Average of "L(theta, phi) - log(p_theta(x))": tensor(98.9533)
Average of "L(theta, phi) - log(p_theta(x))": tensor(82.5006)
Average of "L(theta, phi) - log(p_theta(x))": tensor(69.4036)
Average of "L(theta, phi) - log(p_theta(x))": tensor(59.2476)
Average of "L(theta, phi) - log(p_theta(x))": tensor(51.2994)
Average of "L(theta, phi) - log(p_theta(x))": tensor(45.1739)
Average of "L(theta, phi) - log(p_theta(x))": tensor(40.4563)
Averag

KeyboardInterrupt: 

In [3]:
# checking expectations under approximate model 
with torch.no_grad():
    additive_functional = partial(torch.sum, dim=0)
    smoothed_with_true_model = mse_expectation_against_true_states(state_sequences, observation_sequences, hmm.model, additive_functional)
    smoothed_with_approximate_model = mse_expectation_against_true_states(state_sequences, observation_sequences, v_model, additive_functional)

    print('MSE when smoothed with true model:',smoothed_with_true_model)
    print('MSE when smoothed with variational model:',smoothed_with_approximate_model)

Expectation when smooth with true model: tensor(0.0120)
Expectation when smooth with variational model: tensor(0.0125)


### 2. $p$ has a non linear emission model $x_t = f_\theta(z_t) + \epsilon_t$, but $q$ is still a linear Gaussian HMM