In [None]:
import numpy as np

##################################################
##### Matplotlib boilerplate for consistency #####
##################################################
from ipywidgets import interact
from ipywidgets import FloatSlider
from matplotlib import pyplot as plt

%matplotlib inline

from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg')

global_fig_width = 8
global_fig_height = global_fig_width / 1.61803399
font_size = 12

plt.rcParams['axes.axisbelow'] = True
plt.rcParams['axes.edgecolor'] = '0.8'
plt.rcParams['axes.grid'] = True
plt.rcParams['axes.labelpad'] = 8
plt.rcParams['axes.linewidth'] = 2
plt.rcParams['axes.titlepad'] = 16.0
plt.rcParams['axes.titlesize'] = font_size * 1.4
plt.rcParams['figure.figsize'] = (global_fig_width, global_fig_height)
plt.rcParams['font.sans-serif'] = ['Computer Modern Sans Serif', 'DejaVu Sans', 'sans-serif']
plt.rcParams['font.size'] = font_size
plt.rcParams['grid.color'] = '0.8'
plt.rcParams['grid.linestyle'] = 'dashed'
plt.rcParams['grid.linewidth'] = 2
plt.rcParams['lines.dash_capstyle'] = 'round'
plt.rcParams['lines.dashed_pattern'] = [1, 4]
plt.rcParams['xtick.labelsize'] = font_size
plt.rcParams['xtick.major.pad'] = 4
plt.rcParams['xtick.major.size'] = 0
plt.rcParams['ytick.labelsize'] = font_size
plt.rcParams['ytick.major.pad'] = 4
plt.rcParams['ytick.major.size'] = 0
##################################################

# Likelihood

- Recall the likelihood appears in Bayes' Theorem 

$$P(\theta | data) = \frac{\color{red}{{P(data|\theta)}} P(\theta)}{P(data)}$$

- For any given $\theta$, this gives a valid probability distribution
- If $\theta$ is unknown, then this does *not* give a valid probability distribution (example? coin?)

- For variable $\theta$, we therefore tend to introduce the term *likelihood* to describe $P(data|\theta)$

- A notation often seen in the literature is

$$\mathcal{L}(\theta | data) = P(data | \theta)$$

Therefore, a likelihood of $\theta$ for a particular data sample is equivalent to the probability of that data sample for that value of $\theta$. We call the above the *equivalence relation*




# Choosing an approprate likelihood

- Likelihood is based on your model. This could be a purely statistical model, for example our coin toss example:

![](fig/coin.jpeg)

- Back to the coin example. We perform an experiment of $n$ flips, and it lands heads up $h$ times

- Likelihood is:

$$P(h\times H | \theta) = \theta^h (1-\theta)^{n-h}$$

# Example: disease prevalence of a group



# Example: ODE-based model

- In science, these models often take the form of differential equations
- Lets consider the case of an ordinary differential equation (ODE) model with a time-dependent solution

$$$$
Science has a long tradition of modelling

# Maximum Likelihood Estimation

- In the previous lecture we worked out the posterior *distribution* a given parameter, given the available data
- Often only interested in the most *likely* parameter value
- Can use maximum likelihood estimation, simply find the value of \theta that maximises the likelihood
- a **frequentist** approach, uses a likelihood function (not a valid probability distribution)

$$\theta_{mle} = \text{arg max}_{\theta \in \Omega} P(data|\theta)$$

# Coin example

![](fig/coin.jpeg)

- Back to the coin example. We perform an experiment of $n$ flips, and it lands heads up $h$ times

- Likelihood is:

$$P(h\times H | \theta) = \theta^h (1-\theta)^{n-h}$$

In [None]:
# Coin example

calculate derivative, set to zero

# Example using PINTS

- We will use a popular model of population growth, the logistic equation:

    $$ \frac{df(t)}{dt} = r f(t) \frac{k - f(t)}{k}$$
    $$f(t) = \frac{k}{1+(k/p_0 - 1) \exp(-r t)}$$
    
- Two parameters, the carrying capacity $k$, and the rate of growth $r$
- The `pints.GaussianLogLikelihood` in PINTS implements the independent Gaussian noise log-likelihood derived earlier

In [None]:
import pints
import pints.toy
import matplotlib.pyplot as plt
import numpy as np

p0 = 1  # initial population; initial value
model = pints.toy.LogisticModel(p0)

# Define the 'true' parameters
true_parameters = [0.1, 50, 5]

# Run a simulation to get test data
times = np.linspace(0, 100, 100)
values = model.simulate(true_parameters[:-1], times)

# Add some noise
values += np.random.normal(0, true_parameters[-1], values.shape)


In [None]:
# Show the test data
plt.figure()
plt.xlabel('Time')
plt.ylabel(r'Population $y(t)$')
plt.plot(times, model.simulate(true_parameters[:-1], times), ls='--')
plt.plot(times, values)
plt.show()

In [None]:
# Create an object with links to the model and time series
problem = pints.SingleOutputProblem(model, times, values)

# Create the log-likelihood function
log_likelihood = pints.GaussianLogLikelihood(problem)

# Select some boundaries
boundaries = pints.RectangularBoundaries([0, 0, 0], [100, 100, 100])

# Select a starting point
x0 = [50, 50, 50]

# Perform an optimization using XNES. 
found_parameters, found_value = pints.optimise(log_likelihood, x0, boundaries=boundaries, method=pints.XNES)
print('log_likelihood at true solution:')
print(log_likelihood(true_parameters))

In [None]:
# Show the results
plt.figure()
plt.xlabel('Time')
plt.ylabel(r'Population $y(t)$')
found_mean = model.simulate(found_parameters[:-1], times)
plt.fill_between(times, found_mean - found_parameters[-1], found_mean + found_parameters[-1],
                 color='gray', alpha=0.2)
plt.plot(times, found_mean)
plt.plot(times, values)
plt.show()

In [None]:
# Non-linear optimisation

In [None]:
# Maximum a posteriori estimation 


In [None]:
# coin example, gaussian prior

# Logistic growth example

Recall the logistic equation:
    
$$f(t) = \frac{k}{1+(k/p_0 - 1) \exp(-r t)}$$

- Anyone familiar with this equation could estimate a value of the carrying capacity $k$ from a plot
- Would be reasonable to therefore use a Gaussian Prior for $k$
  

In [None]:
plt.xlabel('Time')
plt.ylabel(r'Population $y(t)$')
plt.plot(times, values)
plt.plot([0, 100], [50, 50], c='k')
plt.show()

In [None]:
# Create the log-likelihood function
log_likelihood = pints.GaussianLogLikelihood(problem)

# Create a uniform prior over r
log_prior_r = pints.UniformLogPrior([0],[100])

# Create a gaussian prior over k
log_prior_k = pints.GaussianLogPrior(50,10)

# Create a uniform prior over sigma
log_prior_sigma = pints.UniformLogPrior([0],[100])

# Create a composed prior
log_prior = pints.ComposedLogPrior(log_prior_r, log_prior_k, log_prior_sigma)

In [None]:
# Create a posterior log-likelihood (log(likelihood * prior))
log_posterior = pints.LogPosterior(log_likelihood, log_prior)

# Select some boundaries
boundaries = pints.RectangularBoundaries([0, 0, 0], [100, 100, 100])

# Select a starting point
x0 = [50, 50, 50]

# Perform an optimization using Particle Swarm Optimisation (PSO). 
found_parameters, found_value = pints.optimise(log_likelihood, x0, boundaries=boundaries, method=pints.PSO)
print('posterior log-likelihood at true solution:')
print(log_posterior(true_parameters))

In [None]:
# Show the results
plt.figure()
plt.xlabel('Time')
plt.ylabel(r'Population $y(t)$')
found_mean = model.simulate(found_parameters[:-1], times)
plt.fill_between(times, found_mean - found_parameters[-1], found_mean + found_parameters[-1],
                 color='gray', alpha=0.2)
plt.plot(times, found_mean)
plt.plot(times, values)
plt.show()

# Electrochemistry example - POMS

- three unresolved two-electron surface-confined polyoxometalate reduction processes by AC voltammetry

![](fig/pom.svg)
**(left)** Molecular structure of $[\text{PMo}_{12}\text{O}_{40}]^{3-}$    **(right)** Experimental AC voltammetry trace

- The sequence of six electron transfer steps are modelled by the following quasi-reversible reactions

\begin{align}
    A + e^- \underset{k^1_{ox}(t)}{\overset{k^1_{red}(t)}{\rightleftarrows}} B,
    \\
    B + e^- \underset{k^2_{ox}(t)}{\overset{k^2_{red}(t)}{\rightleftarrows}} C,
    \\
    C + e^- \underset{k^3_{ox}(t)}{\overset{k^3_{red}(t)}{\rightleftarrows}} D,
    \\
    D + e^- \underset{k^4_{ox}(t)}{\overset{k^4_{red}(t)}{\rightleftarrows}} E,
    \\
    E + e^- \underset{k^5_{ox}(t)}{\overset{k^5_{red}(t)}{\rightleftarrows}} F,
    \\
    F + e^-  \underset{k^6_{ox}(t)}{\overset{k^6_{red}(t)}{\rightleftarrows}} G,
\end{align}

where the forward $k_{red}$ and backwards $k_{ox}$ reaction rates are
given by
the Butler-Volmer
relationships

\begin{align}\label{eq:rate1}
    k^i_{red}(t) &= k^0_i \exp\left(-\frac{\alpha_i F}{RT} [E_r(t) - E^0_i]
    \right), \\
    k^i_{ox}(t) &= k^0_i \exp\left((1-\alpha_i)\frac{F}{RT} [E_r(t) - E^0_i]
\right).  \label{eq:rate2}
\end{align}

- This can be modelled by an ordinary differential equation containing 17 parameters to be estimated

$$
\mathbf{p} =
(E^0_1,E^0_2,E^0_3,E^0_4,E^0_5,E^0_6,k^0_1,k^0_2,k^0_3,k^0_4,k^0_5,k^0_6,
         \alpha_1,
         \alpha_2,
         R_u,
         C_{dl},
         \Gamma).
$$

- The effect of the $E^0_i$ parameters on the simulated current is highly non-linear.
- In such a high dimensional space all non-linear optimisers we tried failed to find the global minimum
- But approximate values of $E^0_i$ can be easily read off the experimental current trace.... **solution:** put a Gaussian prior on all $E^0_i$ parameters

**standard deviation** of the Gaussian prior (i.e. confidence of the estimation of $E^0_i$), required to be $<= 0.1$ V for **reliable parameter estimation**

![](fig/quasireversible.svg)

# Revisiting the independent noise assumption

![](fig/danger.png)

- Assuming independent, Gaussian measurement noise results in the following log-likelihood:

- Independent noise is easy to use, and often makes intuative sense.
- **Check that this assumption is valid.**