This is an attempt to recreate the parameter estimation [example](https://sites.engineering.ucsb.edu/~jbraw/chemreacfun/fig-html/appendix/fig-A-10.html) from James Rawlings book on [Reactor Design](https://sites.engineering.ucsb.edu/~jbraw/chemreacfun/) using Pyomo [parmest](https://pyomo.readthedocs.io/en/stable/contributed_packages/parmest/driver.html). Also, this notebook tests a few scenarios to check why cov matrix has negative diagnoals and the reason a factor singular runtime error appears

In [20]:
# Import libraries
from pyomo.environ import *
from pyomo.dae import *
import pyomo.contrib.parmest.parmest as parmest
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from pyomo.contrib.interior_point.inverse_reduced_hessian import inv_reduced_hessian_barrier

This example has a series reaction $A \rightarrow B \rightarrow C$. The dataset consists of measures concentrations of A, B and C over time. The goal is to estimate the rate constants $k_1$ and $k_2$ for the two reactions.

In [2]:
data_df = pd.read_csv("ABC_data.csv")
data_df.head()

Unnamed: 0,t,ca,cb,cc
0,0.0,0.957,-0.031,-0.015
1,0.263,0.557,0.33,0.044
2,0.526,0.342,0.512,0.156
3,0.789,0.224,0.499,0.31
4,1.053,0.123,0.428,0.454


In [3]:
# Convert data to a list of dictionaries
data = [{'ca_meas': {k:v for (k, v) in zip(data_df.t, data_df.ca)},
    'cb_meas': {k:v for (k, v) in zip(data_df.t, data_df.cb)},
    'cc_meas': {k:v for (k, v) in zip(data_df.t, data_df.cc)} }]

In [4]:
#
# Define the model 
#
def ABC_model(data):
    
    ca_meas = data['ca_meas']
    cb_meas = data['cb_meas']
    cc_meas = data['cc_meas']
    
    meas_t = list(ca_meas.keys())
       
    ca0 = 1.0
    cb0 = 0.0
    cc0 = 0.0
        
    m = ConcreteModel()
    
    m.k1 = Var(initialize = 0.5, bounds = (1e-4, 10))
    m.k2 = Var(initialize = 3.0, bounds = (1e-4, 10))
    
    m.time = ContinuousSet(bounds = (0.0, 5.0), initialize = meas_t)
    m.ca = Var(m.time, initialize = ca0, bounds = (0, ca0))
    m.cb = Var(m.time, initialize = cb0, bounds = (0, ca0))
    m.cc = Var(m.time, initialize = cc0, bounds = (0, ca0))
    
    m.dca = DerivativeVar(m.ca, wrt = m.time)
    m.dcb = DerivativeVar(m.cb, wrt = m.time)
    m.dcc = DerivativeVar(m.cc, wrt = m.time)
    
    def _dcarate(m, t):
        if t == 0:
            return Constraint.Skip
        else:
            return m.dca[t] == -m.k1 * m.ca[t]
    m.dcarate = Constraint(m.time, rule = _dcarate)
    
    def _dcbrate(m, t):
        if t == 0:
            return Constraint.Skip
        else:
            return m.dcb[t] == m.k1 * m.ca[t] - m.k2 * m.cb[t]
    m.dcbrate = Constraint(m.time, rule = _dcbrate)
    
    def _dccrate(m, t):
        if t == 0:
            return Constraint.Skip
        else:
            return m.dcc[t] == m.k2 * m.cb[t]
    m.dccrate = Constraint(m.time, rule = _dccrate)
    
    def _initcon(m):
        yield m.ca[m.time.first()] == ca0
        yield m.cb[m.time.first()] == cb0
        yield m.cc[m.time.first()] == cc0
    m.initcon = ConstraintList(rule = _initcon)
    
    def ComputeFirstStageCost_rule(m):
        return 0
    m.FirstStageCost = Expression(rule=ComputeFirstStageCost_rule)

    def ComputeSecondStageCost_rule(m):
        return sum((m.ca[t] - ca_meas[t]) ** 2 + (m.cb[t] - cb_meas[t]) ** 2 
                   + (m.cc[t] - cc_meas[t]) ** 2 for t in meas_t) 
    m.SecondStageCost = Expression(rule=ComputeSecondStageCost_rule)

    
    def total_cost_rule(model):
        return model.FirstStageCost + model.SecondStageCost
    m.Total_Cost_Objective = Objective(rule=total_cost_rule, sense=minimize)
    
    disc = TransformationFactory('dae.collocation')
    disc.apply_to(m, nfe=20, ncp=2)
    
    return m

In [5]:
# parameters to be estimated
theta_names = ['k1', 'k2']

In [8]:
# Running this model results in runtime error of factor singular
pest = parmest.Estimator(ABC_model, data, theta_names, tee = True)
res = pest.theta_est(calc_cov = True)

Ipopt 3.14.5: bound_relax_factor=0
honor_original_bounds=no


******************************************************************************
This program contains Ipopt, a library for large-scale nonlinear optimization.
 Ipopt is released as open source code under the Eclipse Public License (EPL).
         For more information visit https://github.com/coin-or/Ipopt
******************************************************************************

This is Ipopt version 3.14.5, running with linear solver ma27.

Number of nonzeros in equality constraint Jacobian...:      923
Number of nonzeros in inequality constraint Jacobian.:        0
Number of nonzeros in Lagrangian Hessian.............:      140

Total number of variables............................:      245
                     variables with only lower bounds:        0
                variables with lower and upper bounds:      125
                     variables with only upper bounds:        0
Total number of equality constraints...

  duals_primals_ub/(self._nlp.primals_ub() - primals))


RuntimeError: Factor is exactly singular

In [12]:
#
# Define the model where initial conditions constraints are replaced by fixing variables for ca, cb, cc at time 0
# but retaining bounds on ca, cb, cc for remaining time points
#
def ABC_model_fixinit(data):
    
    ca_meas = data['ca_meas']
    cb_meas = data['cb_meas']
    cc_meas = data['cc_meas']
    
    meas_t = list(ca_meas.keys())
       
    ca0 = 1.0
    cb0 = 0.0
    cc0 = 0.0
        
    m = ConcreteModel()
    
    m.k1 = Var(initialize = 0.5, bounds = (1e-4, 10))
    m.k2 = Var(initialize = 3.0, bounds = (1e-4, 10))
    
    m.time = ContinuousSet(bounds = (0.0, 5.0), initialize = meas_t)
    # initialization and bounds set later
    m.ca = Var(m.time)
    m.cb = Var(m.time)
    m.cc = Var(m.time)
    
    m.dca = DerivativeVar(m.ca, wrt = m.time)
    m.dcb = DerivativeVar(m.cb, wrt = m.time)
    m.dcc = DerivativeVar(m.cc, wrt = m.time)
    
    def _dcarate(m, t):
        if t == 0:
            return Constraint.Skip
        else:
            return m.dca[t] == -m.k1 * m.ca[t]
    m.dcarate = Constraint(m.time, rule = _dcarate)
    
    def _dcbrate(m, t):
        if t == 0:
            return Constraint.Skip
        else:
            return m.dcb[t] == m.k1 * m.ca[t] - m.k2 * m.cb[t]
    m.dcbrate = Constraint(m.time, rule = _dcbrate)
    
    def _dccrate(m, t):
        if t == 0:
            return Constraint.Skip
        else:
            return m.dcc[t] == m.k2 * m.cb[t]
    m.dccrate = Constraint(m.time, rule = _dccrate)

#   Replaced these constraints with fixing the variables at time 0
#    def _initcon(m):
#        yield m.ca[m.time.first()] == ca0
#        yield m.cb[m.time.first()] == cb0
#        yield m.cc[m.time.first()] == cc0
#    m.initcon = ConstraintList(rule = _initcon)
    
    def ComputeFirstStageCost_rule(m):
        return 0
    m.FirstStageCost = Expression(rule=ComputeFirstStageCost_rule)

    def ComputeSecondStageCost_rule(m):
        return sum((m.ca[t] - ca_meas[t]) ** 2 + (m.cb[t] - cb_meas[t]) ** 2 
                   + (m.cc[t] - cc_meas[t]) ** 2 for t in meas_t) 
    m.SecondStageCost = Expression(rule=ComputeSecondStageCost_rule)

    
    def total_cost_rule(model):
        return model.FirstStageCost + model.SecondStageCost
    m.Total_Cost_Objective = Objective(rule=total_cost_rule, sense=minimize)
    
    disc = TransformationFactory('dae.collocation')
    disc.apply_to(m, nfe=20, ncp=2)
    
#  explicitly fixing ca, cb, cc at time 0 to initial conditions and setting bounds for remaining time points
    for t in m.time:
        if t == 0:
            m.ca[t].fix(ca0)
            m.cb[t].fix(cb0)
            m.cc[t].fix(cc0)
        else:
            m.ca[t] = ca0
            m.cb[t] = cb0
            m.cc[t] = cc0
            m.ca[t].setlb(0.0)
            m.ca[t].setub(ca0)
            m.cb[t].setlb(0.0)
            m.cb[t].setub(ca0)
            m.cc[t].setlb(0.0)
            m.cc[t].setub(ca0)
    
    return m

In [13]:
pest = parmest.Estimator(ABC_model_fixinit, data, theta_names, tee = True)
res = pest.theta_est(calc_cov = True)

Ipopt 3.14.5: bound_relax_factor=0
honor_original_bounds=no


******************************************************************************
This program contains Ipopt, a library for large-scale nonlinear optimization.
 Ipopt is released as open source code under the Eclipse Public License (EPL).
         For more information visit https://github.com/coin-or/Ipopt
******************************************************************************

This is Ipopt version 3.14.5, running with linear solver ma27.

Number of nonzeros in equality constraint Jacobian...:      914
Number of nonzeros in inequality constraint Jacobian.:        0
Number of nonzeros in Lagrangian Hessian.............:      137

Total number of variables............................:      242
                     variables with only lower bounds:        0
                variables with lower and upper bounds:      122
                     variables with only upper bounds:        0
Total number of equality constraints...

In [18]:
obj, theta, cov = res
print('Objective: ', obj)
print('Parameters: ', theta)
print('Covariance matrix')
print(cov)

Objective:  0.02612145403078627
Parameters:  k1    2.014382
k2    0.994455
dtype: float64
Covariance matrix
          k1        k2
k1 -0.132001  0.020262
k2  0.020262 -0.018095


The cov is now calculated but has negative elements in the diagonal. But it is not an issue with calculation of inverse reduced hessian but the incorrect value of number of data points (assumed 1 in the above calculation since all the data is in a single element dictionary). The above cov result is reproduced below using the inv_reduced_hessian_barrier function

In [21]:
m = ABC_model_fixinit(data[0])
solve_result, inv_red_hes = inv_reduced_hessian_barrier(m, 
                    independent_variables= [m.k1, m.k2],
                    tee=True)

Ipopt 3.14.5: bound_relax_factor=0
honor_original_bounds=no


******************************************************************************
This program contains Ipopt, a library for large-scale nonlinear optimization.
 Ipopt is released as open source code under the Eclipse Public License (EPL).
         For more information visit https://github.com/coin-or/Ipopt
******************************************************************************

This is Ipopt version 3.14.5, running with linear solver ma27.

Number of nonzeros in equality constraint Jacobian...:      914
Number of nonzeros in inequality constraint Jacobian.:        0
Number of nonzeros in Lagrangian Hessian.............:      137

Total number of variables............................:      242
                     variables with only lower bounds:        0
                variables with lower and upper bounds:      122
                     variables with only upper bounds:        0
Total number of equality constraints...

In [23]:
inv_red_hes # diagonals are positive

array([[ 2.52668023, -0.38784893],
       [-0.38784893,  0.34636443]])

In [24]:
# reproduces cov calculation from parmest when using n = 1
n = 1 
p = 2
sse = m.Total_Cost_Objective()
cov_est = 2 * sse / (n - p) * inv_red_hes
cov_est

array([[-0.13200112,  0.02026236],
       [ 0.02026236, -0.01809509]])

In [25]:
# the actual cov calculation should use n = 60
n = 60
p = 2
sse = m.Total_Cost_Objective()
cov_est2 = 2 * sse / (n - p) * inv_red_hes
cov_est2

array([[ 0.00227588, -0.00034935],
       [-0.00034935,  0.00031198]])