# Working with Time-Series Data in a Consistent Bayesian Framework
---

Copyright 2017 Michael Pilosov

Demonstration available at https://www.youtube.com/watch?v=rUIVcl64NXw

### Import Libraries
_(should be 2.7 and 3.x compatible) _

In [None]:
# Mathematics and Plotting
import numpy as np
from matplotlib import pyplot as plt
import scipy.stats as sstats
from scipy.stats import gaussian_kde as gkde
%matplotlib inline
plt.rcParams.update({'font.size': 14})
plt.rcParams['figure.figsize'] = 5, 5

# Interactivity
from ipywidgets import *

---
## Defining the Parameter to Observables (PtO) Map 

---
Consider the Ordinary Differential Equation Initival Value Problem given by  

$$
\partial_t u(t) = -u(t) \\
u(0) = \lambda_0
$$

The solution to this problem is $u(t) = \lambda_0 \,e^{-t}$.

Suppose $\lambda_0$ is some uncertain input parameter that we are trying to estimate through experimental observations. 

Suppose we know that $\lambda_0 \in [0, 2]$ with uniform probability and that we hope to infer the parameter by observing the system $u$ at $K+1$ evenly spaced intervals in the interval $[T_0, T]$, where $T > T_0 > 0$. This corresponds to have $K$ degrees of freedom.

In order to define our Parameter-to-Observables Map, we want to transform these observations into a single measurement, which we do by looking at the **Mean Squared Error (2-norm) between the observations and the model predictions**:  

$$
O(\lambda) = \frac{1}{K+1} \sum_{k=0}^{K+1} \frac{(\lambda e^{-(T_0+k\Delta_t)} - \lambda_0e^{-(T_0+k\Delta_t)}) ^ 2}{\sigma_k^2} = \frac{1}{K+1}  \sum_{k=0}^{K} \frac{( (\lambda- \lambda_0) e^{-(T_0+k\Delta_t)} ) ^ 2 }{ \sigma_k^2}
$$
$$
= \frac{(\lambda- \lambda_0)^2}{K+1}  \sum_{k=0}^{K}  \left ( \frac{e^{(T_0+k\Delta_t)}}{\sigma_k} \right )^2, \quad \text{where } \Delta_t\equiv \frac{T - T_0}{K+1}
$$


Observe that the division of each measurement by a standard deviation is consistent with the formulation of an additive error statistical model familiar to the usual Bayesian formulation:  

$$
y_i = \beta\, x_i + \epsilon_i, \quad \epsilon_i \sim N(0,\sigma_i)
$$

For a step-by-step walkthrough, please see the CBayes_TS.ipynb file.
Below you will find an all-in-one version. 


---

### Define some functions for the sandbox

In [None]:
def sandbox(num_samples = int(1E4), lam_bound = [0,2], lam0=0.5, dof=1,
            T=[0.1,1], uncertainty = 0.05, sd = 1):
    # NOTE this version only uses constant variances for the sake
    # of interactivity.
    num_observations = dof + 1
    sigma = sd*np.ones(num_observations)
    T_start, T_end = T
    if T_end < T_start:
        print('Error: end time is before start time. Switching them now.')
        T_temp = T_end
        T_end = T_start
        T_start = T_temp
    if num_observations == 1:
        print('K=0 specified, This is a single observation at t = %f.'%T_start)
    t = np.linspace(T_start, T_end, num_observations)
    def PtO_fun(lam):
        return ((lam - lam0)**2/num_observations)*np.sum( [ np.power(
            [ np.exp(-t[k])/sigma[k] ], 2)[0] 
            for k in range(int(num_observations))          ], 0 )
    
    # Sample the Parameter Space
    a, b = lam_bound
    lam = np.random.uniform(a, b, size = (1, int(num_samples)) ) # standard uniform
    # Map to Data Space
    D = PtO_fun(lam)
#     print('dimensions :  lambda = ' + str(lam.shape) + '   D = ' + str(D.shape) )
    # Perform KDE to estimate the pushforward
    pf_dens = gkde(D) # compute KDE estimate of it
    # Specify Observed Measure - Uniform Density
    
    obs_dens = sstats.uniform(0,uncertainty) # 1D only
    # Solve the problem
    r = obs_dens.pdf( D ) / pf_dens.evaluate(D) # vector of ratios evaluated at all the O(lambda)'s
    M = np.max(r)
    eta_r = r[0]/M
    print('\tEntropy is %1.4e'%sstats.entropy(eta_r))
    res = 50;
    max_x = 3;
    # Plot stuff
    plt.rcParams['figure.figsize'] = (18, 6)
    plt.figure()
    plt.subplot(1, 3, 1)
    x = np.linspace(-0.25, max_x, res)
    plt.plot(x, pf_dens.evaluate(x))
    plt.title('Pushforward of Prior')
    plt.xlabel('O(lambda)')
    
    plt.subplot(1, 3, 2)
    xx = np.linspace(0, 0.5, res)
    plt.plot(xx, obs_dens.pdf(xx))
    plt.title('Observed Density')
    plt.xlabel('O(lambda)')

    plt.subplot(1, 3, 3)
    plt.scatter(lam, eta_r)
    # plt.plot(lam_accept, gkde(lam_accept))
    plt.scatter(lam0, 0.05)
    plt.title('Posterior Distribution') #\nof Uniform Observed Density \nwith bound = %1.2e'%uncertainty)
    plt.xlabel('Lambda')
#     plt.title('$\eta_r$')
    # # OPTIONAL:
    # pr = 0.2 # percentage view-window around true parameter.
#     plt.xlim(lam0*np.array([1-pr,1+pr]))
    plt.xlim([a,b])
    plt.show()
    
#     return eta_r

---

# All-in-One Sandbox!
_Run the cells below to start experimenting_
All the underscores are because I named lists as those objects, which disabled autocomplete. So I use the widget here with the same name to figure out the syntax, then add in the `[k]` index e.g. `num_samples[0].value()` afterwards.

In [None]:
_num_samples = widgets.IntSlider(value=1000, continuous_update=False, orientation='vertical',
    min=int(5E2), max=int(5E4), step=500, description='$N$ :')

_lam_bound = widgets.FloatRangeSlider(value=[0.0, 2.0], continuous_update=False, orientation='horizontal',
    min=-5.0, max = 5.0, step=0.25, description='Param: $\Lambda \in$')

_lam0 = widgets.FloatSlider(value=1.0, continuous_update=False, orientation='horizontal',
    min=0.25, max=1.75, step=0.05, description='IC: $\lambda_0$')

def update_lam0_range(*args): # update ref lambda if lambda bound changes
    _lam0.min = _lam_bound.value[0]
    _lam0.max = _lam_bound.value[1]
_lam_bound.observe(update_lam0_range, 'value')

_dof = widgets.IntSlider(value=0, continuous_update=False, orientation='horizontal',
    min=0, max=50, description='d.o.f: $K$ =')

_T = widgets.FloatRangeSlider( value=[0.5, 1], min=0.1, max=7.5, step=0.1, continuous_update=False,
    description='$t\in [T_0, T]$ :', orientation='horizontal',
    readout=True, readout_format='.1f')

_uncertainty = widgets.FloatSlider(value=0.01, continuous_update=False, orientation='vertical',
    min=0.005, max=0.25, step=0.005, 
    description='$\epsilon$ :', readout_format='.3f')

_sd = widgets.FloatSlider(value=1, continuous_update=False, orientation='vertical',
    min=0.15, max=1.85, step=0.05, description='$\sigma$ :')

_lbl = widgets.Label("UQ Sandbox", disabled=False)
_u1 = widgets.VBox([_lbl, _lam_bound, _lam0, _dof, _T])
_u2 = widgets.HBox([_num_samples, _uncertainty, _sd])
# u3 = widgets.HBox([uncertainty, sd])
_ui = widgets.HBox([_u1, _u2])
_u1.layout.justify_content = 'center'
_ui.layout.justify_content = 'center'


_out = interactive_output(sandbox, {'num_samples': _num_samples, 
                        'lam_bound': _lam_bound, 
                        'lam0': _lam0, 
                        'dof': _dof, 
                        'T': _T,
                        'uncertainty': _uncertainty, 
                         'sd': _sd} )
display(_ui, _out)

---

### Suggestions

- Increase $N$ and watch the Pushforward of the Prior change/converge.
- If you broaden the standard deviation $\sigma$, we suggest to also broaden the bound on the parameter space $\Lambda$ in order to avoid voilating the predictability assumption.
- Notice the relationship between the bound on the interval we are inverting for the Mean Squared Error and the support of the posterior.
- The same happens as you increase $\sigma$.
- Change the initial condition $\lambda_0$ and watch the posterior distribution follow the slider.



- Fix the number of observations to 1 and change the interval over which the observation is being made (with $K=0$, the observation occurs only at $T_0$). Notice the diminishing returns as you wait to make your measurement. 
- Fix some interval and change the number of observations made during this time period.
- Fix a number of observations (several) and fix $T_0$ while changing $T$ to observe another example of diminshing returns.

### Observations

- Entropy barely changes as $\lambda_0$ moves around. Increases a bit near boundary of $\Lambda$ (likely due to predictability assumption being violated)
- For a wide time measurement window, entropy increases with the number of observations $K$ (d.o.f.)
- Widening $\Lambda$ decreases entropy, obviously enlarges $\mathcal{D}$, support of $P_\mathcal{D}$.
- If you narrow the window, the entropy decreases.
- As the window slides earlier in time, the entropy decreases.
- Higher MSE threshold ($\epsilon$, support of observed density) means higher entropy.
- Higher variance means higher entropy. We might run a suite of $\sigma$s MADS-style to study the robustness of a design. 
    - perhaps if we try to minimize entropy (maximize information gain), we look for designs that are less sensitive to the choice of $\sigma$s, which would **correspond to an experimental design that is robust to measurement uncertainty.**
- Increasing the number of samples $N$ increases entropy quite a bit. Would like to figure out a way to control for this? _Is it even right to be using `scipy.stats.entropy`?_

In [None]:
display(_ui, _out)

---
#### Work in progress below

---

## Experiments


In [None]:
num_experiments = 5

# We create many copies of the same widget objects in order to isolate our experimental areas.
num_samples = [widgets.IntSlider(value=1000, continuous_update=False, orientation='vertical',
    min=int(5E2), max=int(1E4), step=500, description='$N$ :') for k in range(num_experiments)]
lam_bound = [widgets.FloatRangeSlider(value=[0.0, 2.0], continuous_update=False, orientation='horizontal',
    min=-5.0, max = 5.0, step=0.25, description='Param: $\Lambda \in$') for k in range(num_experiments)]
lam0 = [widgets.FloatSlider(value=1.0, continuous_update=False, orientation='horizontal',
    min=0.25, max=1.75, step=0.05, description='IC: $\lambda_0$') for k in range(num_experiments)]
dof = [widgets.IntSlider(value=0, continuous_update=False, orientation='horizontal',
    min=0, max=50, description='d.o.f: $K$ =') for k in range(num_experiments)]
T = [widgets.FloatRangeSlider( value=[0.5, 1], min=0.1, max=7.5, step=0.1, continuous_update=False,
    description='$t\in [T_0, T]$ :', orientation='horizontal',
    readout=True, readout_format='.1f') for k in range(num_experiments)]
uncertainty = [widgets.FloatSlider(value=0.01, continuous_update=False, orientation='vertical',
    min=0.005, max=0.25, step=0.005,
    description='$\epsilon$ :', readout_format='.3f') for k in range(num_experiments)]
sd = [widgets.FloatSlider(value=1, continuous_update=False, orientation='vertical',
    min=0.15, max=1.85, step=0.05, description='$\sigma$ :') for k in range(num_experiments)]
                          
D = [ {'num_samples': num_samples[k], 
         'lam_bound': lam_bound[k], 
         'lam0': lam0[k], 
         'dof': dof[k], 
         'T': T[k],
         'uncertainty': uncertainty[k], 
         'sd': sd[k]} for k in range(num_experiments)] 


out = [interactive_output(sandbox, D[k]) for k in range(num_experiments)]
                          
def update_lam0(*args):
    k = tab_nest.selected_index
    lam0[k].min = lam_bound[k].value[0]
    lam0[k].max = lam_bound[k].value[1]
#     def update_lam0_range(): # update ref lambda if lambda bound changes
#         lam0[k].min = lam_bound[k].value[0]
#         lam0[k].max = lam_bound[k].value[1]
#     return update_lam0_range
# L = {}
# Bind Updates for linked sliders
[lam_bound[k].observe(update_lam0, 'value') for k in range(num_experiments)]


# User Interface
lbl = widgets.Label("UQ Sandbox", disabled=False)

u1 = [widgets.VBox([lbl, lam_bound[k], lam0[k], dof[k], T[k]]) for k in range(num_experiments)]
for k in range(num_experiments):
    u1[k].layout.justify_content = 'center'
u2 = [widgets.HBox([num_samples[k], uncertainty[k], sd[k]]) for k in range(num_experiments)]
ui = [widgets.HBox([u1[k], u2[k]]) for k in range(num_experiments)]
# lam_bound[k].observe(L[str(k)], 'value') 
    
# Create our pages
pages = [widgets.HBox() for k in range(num_experiments)]

# instantiate notebook with tabs (accordions) representing experiments
tab_nest = widgets.Tab()
tab_nest.children = [pages[k] for k in range(num_experiments)]

# title your notebooks
experiment_names = ['Experiment %d'%k for k in range(num_experiments)]
for k in range(num_experiments):
    tab_nest.set_title(k, experiment_names[k])

# Spawn the children
for k in range(num_experiments):
    tab_nest.children[k].children = [widgets.VBox([ui[k], out[k]])]

# Display the "tabulated nest"
tab_nest

In [None]:
# Now that everything has been created, we can start tweaking our experiments.
# Commands like the one below can be put into accordions or new tabs as checkboxes.

# this is now how you can disable individual sliders.
dof[0].disabled = True 

# this is how you can manually write a value
dof[0].set_trait('value', 5) 

In [None]:
# we can grab individual elements and control them here instead, 
# the tabs above will react.
tab_nest.children[0]

In [None]:
tab_nest.selected_index

In [None]:
lam_bound[1].value

In [None]:
lam_bound[1].keys