prokaryotic_no_sum.py

import torch
import torch.nn.functional as F
from sbi.inference import SNPE, AALR, SNLE, prepare_for_sbi, simulate_for_sbi, SMCABC
from sbi.utils.get_nn_models import posterior_nn, classifier_nn
from torch.distributions import Gamma
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import pickle
import time as timer
import os
import argparse
from models.pky_simulator import PKYSim
from CPP import pkyssa
from inference.infer import BootStrappSMC as SMC
from inference.infer import trainer_ide, sampler_ide, compute_mse_and_coverage

def sequential_estimation(theta, 
                          noisy_x, 
                          inference,
                          proposal=None, 
                          first_round=False,
                          batch_size=256,
                          ds_factor=5,
                          ):

  if first_round:

    density_estimator = inference.append_simulations(\
    theta, noisy_x).train(
      training_batch_size=batch_size,
                        )
  else:

    density_estimator = inference.append_simulations(\
    theta, noisy_x, proposal).train(
        training_batch_size=batch_size,
                                    )        
  posterior = inference.build_posterior(density_estimator)
  return posterior, inference

def mse_covg_var(Y, x_ide_, x_gen):
  mse_, covg_ = compute_mse_and_coverage(x_gen, x_ide_)
  var_ = np.mean(x_ide_.std(axis=0)/x_ide_.mean(axis=0))
  ppc_ = (x_ide_[...,1] + 2*x_ide_[...,2]) + np.random.randn(*x_ide_.shape[:2])*noise_std
  mse_ppc_, covg_ppc_ = compute_mse_and_coverage(Y, ppc_)
  var_ppc_ = np.mean(ppc_.std(axis=0)/ppc_.mean(axis=0)) 
  return  mse_, covg_, var_, mse_ppc_, covg_ppc_, var_ppc_ 
  
class SummaryNet(torch.nn.Module):
  def __init__(self):
    super().__init__()
    
    self.lstm = torch.nn.LSTM(1, 10, num_layers=2, batch_first=True)
    self.fc = torch.nn.Linear(in_features=10, out_features=8)

  def forward(self, x):
    
    x = x.view(-1,100,1)
    output, (final_hidden_state, final_cell_state) = self.lstm(x)
    x = F.relu(self.fc(final_hidden_state[-1].squeeze())).view(-1,8)
    
    return x

def main(args):
  
  id_number = 1
  # arguments
  num_rounds = args.num_rounds # number of sequential rounds
  num_sim_init = args.num_sim_init # number of simulations in first round (used by IDE training)
  num_sim = args.num_sim # number of simulations in subsequent rounds
  num_samples = args.num_samples # number of posterior \theta samples
  noise_std = 2
  time = 100
  init_vals = [8.,8.,8.,5.]
  K=4
  d=8

  ###  Define priors, simulator, sbi ###
  _prior = [Gamma(torch.tensor([2.]), torch.tensor([3.])) for _ in range(d)]

  _simulator = PKYSim(simulate_summary=False)
  simulator, prior = prepare_for_sbi(_simulator, _prior)

  embedding_net = SummaryNet()
  classifier_net = classifier_nn(model='resnet', embedding_net_x=embedding_net)
  inference_re = AALR(prior=prior, classifier=classifier_net)
  posteriors_re = []
  

  # generate data
  gen_par = np.array([[0.1,0.7,0.35,0.2,0.1,0.9,0.3,0.1]]) 
  x_gen = _simulator(gen_par).squeeze()
  Y = (x_gen[:,1] + x_gen[:,2]) + np.random.randn(time).astype(np.float32)*noise_std
  param_filename = './data/'+str(id_number)+'pky_data.p'
  pickle.dump(Y, open(param_filename, 'wb'))
  
  proposal = prior
  
  # Create summary by downsampling in time dimension
  ds_factor = 5
  y = Y[::ds_factor]

  # Run sequential neural inference
  for round in range(num_rounds):
      if round == 0:
        theta, x = simulate_for_sbi(simulator, 
                                    proposal, 
                                    num_simulations=num_sim_init,
                                    )
        x_ = x[...,1] + 2*x[...,2]                                  
        noisy_x = x_ + np.random.randn(num_sim_init,time).astype(np.float32)*noise_std 
       
      
        posterior_re, inference_re = sequential_estimation(theta, 
                              noisy_x, 
                              inference_re, 
                              first_round=True,
                              )
        posteriors_re.append(posterior_re)
        proposal_re = posterior_re.set_default_x(Y)
      
      else:
        theta_re, x_re = simulate_for_sbi(simulator, 
                                          proposal_re, 
                                          num_simulations=num_sim,
                                          )
        x_re_ = x_re[...,1] + 2*x_re[...,2]
        noisy_x_re = x_re_ + np.random.randn(num_sim,time).astype(np.float32)*noise_std
        
        posterior_re, inference_re = sequential_estimation(theta_re, 
                              noisy_x_re, 
                              inference_re,
                              proposal=proposal_re,
                              )
        posteriors_re.append(posterior_re)
        proposal_re = posterior_re.set_default_x(y)  
        
      print('Num rounds finished: ', round)

 
  params_re = posteriors_re[-1].sample((num_samples,), x=y).cpu().numpy()
  param_filename = './results/sre/'+str(id_number)+'pky_params_nosum.p'
  pickle.dump(params_re, open(param_filename, 'wb'))
  
  # Generate posterior sample path using IDE
  ide_importance, ide_corrector = trainer_ide(x.detach().numpy(), 
                                              noisy_x.detach().numpy().reshape((-1,time,1)), 
                                              time,
                                              theta=theta.detach().numpy(), 
                                              )
  x_ide_re = sampler_ide(Y.reshape((-1,1)), init_vals, params_re, ide_importance, ide_corrector)

  param_filename = './results/sre/'+str(id_number)+'pky_paths_re_nosum.p'
  pickle.dump(x_ide_re, open(param_filename, 'wb'))

  # Generate posterior sample path using SMC (Baseline)
  times = np.arange(0,time,1)
  likelihood = lambda x,y,std: stats.norm(x[:,1] + 2*x[:,2],std).logpdf(y)
  smc_re = np.zeros((num_samples, len(times), K))

  for i in range(num_samples):
      params =  tuple((np.array(params_re[i,:]),noise_std))
      smc_re[i,:] = SMC(pkyssa, Y, init_vals, params, 100, times, likelihood)

  param_filename = './results/smc/'+str(id_number)+'pky_paths_smcre.p'
  pickle.dump(smc_re, open(param_filename, 'wb'))  

  # Generate sample paths from Prior Dynamics
  prdyn_re = _simulator(params_re)

  # Calculate MSE. Coverrage and Coefficient of Variation
  mse_re, covg_re, var_re, mse_ppc_re, covg_ppc_re, var_ppc_re = mse_covg_var(Y, x_ide_re, x_gen)
  mse_smcre, covg_smcre, var_smcre, mse_ppc_smcre, covg_ppc_smcre, var_ppc_smcre = mse_covg_var(Y, smc_re, x_gen)


  metrics_x = {
          'method': ['IDE_SRE','SMC_SRE','PrDyn_SRE'],
          'mse': [mse_re, mse_smcre, mse_prre], 
          'covg': [covg_re, covg_smcre, covg_prre],
          'var': [var_re, var_smcre, var_prre],
          }

  metrics_ppc = {
          'method': ['IDE_SRE','SMC_SRE','PrDyn_SRE'],
          'mse': [mse_ppc_re, mse_ppc_smcre, mse_ppc_prre], 
          'covg': [covg_ppc_re, covg_ppc_smcre, covg_ppc_prre],
          'var': [var_ppc_re, var_ppc_smcre, var_ppc_prre],
          }

  metrics_x_df = pd.DataFrame.from_dict(metrics_x)
  metrics_ppc_df = pd.DataFrame.from_dict(metrics_ppc)
  metrics_x_df.to_csv('./results/pky_metrics_x_nosum.csv')
  metrics_ppc_df.to_csv('./results/pky_metrics_ppc_nosum.csv')

if __name__ == '__main__':
  parser = argparse.ArgumentParser(
          description='Neural LFI for the Prokaryotic autoregulator (Pky) model, with LSTM summary net')
  parser.add_argument('--num_rounds', type=int, default=3, metavar='N',
                      help='number of sequential rounds')
  parser.add_argument('--num_sim_init', type=int, default=100, metavar='N',
                      help='number of simulations in first round (used by IDE training)')
  parser.add_argument('--num_sim', type=int, default=100, metavar='N',
                      help='number of simulations in subsequent rounds')
  parser.add_argument('--num_samples', type=int, default=50, metavar='N',
                      help='number of posterior \theta samples')

                    
  args = parser.parse_args()
  main(args)