In [None]:
# %load init.py
import os
import pickle
import sys
# Enable module import from the parent directory from notebooks
sys.path.append(os.path.abspath('..'))
import time

import matplotlib as mpl
# Select plotting backend
mpl.use('nbAgg')

import matplotlib.pyplot as plt
# Customize plotting
plt.style.use('seaborn-paper')
plt.rcParams['axes.labelsize'] = 11.0
plt.rcParams['axes.titlesize'] = 12.0
plt.rcParams['errorbar.capsize'] = 3.0
plt.rcParams['figure.dpi'] = 72.0
plt.rcParams['figure.titlesize'] = 12.0
plt.rcParams['legend.fontsize'] = 10.
plt.rcParams['lines.linewidth'] = 1.
plt.rcParams['xtick.labelsize'] = 11.0
plt.rcParams['ytick.labelsize'] = 11.0

import numpy as np
import sympy as sp
sp.init_printing(euler=True, use_latex=True)

from IPython import display
from scipy import io, optimize
from sklearn import metrics

import core
import dynamicals
import kernels
import numericals
import utils

In [None]:
dynamical = dynamicals.Lorenz96(500) 

# Example
Run the inference algorithm

In [None]:
config = core.Config()
config.create_time(0, 4, 80, 0, 4, 80, 0, 4, 80)
config.X_0 = np.random.random(dynamical.num_x) * 8.
config.theta = np.array([8.]) 
config.rho_2 = np.full(dynamical.num_x, 4.) 

config.phi = [
    # (Kernal name, Kernal parameters)
    ('rbf', np.array([4.2, 0.1]))
] * dynamical.num_x
config.sigma_2 = np.full(dynamical.num_x, 1.) 
config.delta = np.full(dynamical.num_x, True)
config.delta[np.random.permutation(dynamical.num_x)[:int(0.35 * dynamical.num_x)]] = False
config.gamma = np.full(dynamical.num_x, 5e-2) 
config.gamma[config.delta] = 1e-1

config.opt_method = 'Newton-CG'
config.opt_tol = 1e-6
config.max_init_iter = 10
config.max_iter = 1000

config.plotting_enabled = True
config.plotting_freq = 50

config.spl_X = dynamical.generate_sample_path(config.theta, config.rho_2, config.X_0, config.spl_tps)
config.obs_Y = utils.collect_observations(config.spl_X, config.obs_t_indices, config.sigma_2)

gp = core.GaussianProcessRegression(dynamical, config)
gp.run()

lpmf = core.LaplaceMeanFieldSDE(dynamical, config, gp)
lpmf.run()

# Experiment
Setup for the experiment

In [None]:
directory = '../data/sde-lorenz-96-vgpamf/{}/'
config_filename = utils.CONFIG_FILENAME
data_filename = utils.DATA_FILENAME

vgpamf_directory = '/Users/ruifengxu/Development/ruiixu23/VGPA_MF/Results/'
vgpamf_filename = '{}-VGPA.mat'
vgpamf_config_filename = '{}-config.mat'
vgpamf_grid_filename = '{}-VGPA-grid-{:.1f}.mat'

num_repetitions = 10
num_rodes = 100

Plot the ODE trajactories

In [None]:
dynamical = dynamicals.Lorenz96(10)
config = core.Config()
config.create_time(0, 15, 100, 0, 15, 10, 0, 15, 10)
config.X_0 = np.random.random(dynamical.num_x)
thetas = [
    np.array([0.5]),
    np.array([2.]),
    np.array([8.])
]
config.rho_2 = None

titles = [
    'F = 0.5',
    'F = 2',
    'F = 8'
]

figure = plt.figure(figsize=plt.figaspect(3 / 9))
for i in range(3):
    config.theta = thetas[i]
    config.spl_X = dynamical.generate_sample_path(config.theta, config.rho_2, config.X_0, config.spl_tps)
    ax = figure.add_subplot(1, 3, i + 1)
    ax.plot(config.spl_tps, config.spl_X[0], label='State $1$', linewidth=1.5)
    ax.plot(config.spl_tps, config.spl_X[1], label='State $2$', linewidth=1.5)
    ax.plot(config.spl_tps, config.spl_X[2], label='State $3$', linewidth=1.5)
    ax.set_xlabel('Time', fontsize=13.)    
    if i == 0:
        ax.set_ylabel('State', fontsize=13.)
    ax.set_title(titles[i], fontsize=15.)
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles=handles, labels=labels, loc=0)
    ax.set_xlim([0, 15])

plt.tight_layout()
plt.show()
figure.savefig('lorenz-96-trajectories.eps', format='eps', dpi=1000, bbox_inches='tight')

- Print which states are observed
- Plot the state estimation for one SDE sample path
- Plot the parameter estiamtion for one SDE sample path

In [None]:
repetition = 6

# Load data
config = core.Config()
config.load_config(directory.format(repetition), config_filename)

# Print which states are observed
tmp = 0
for i in np.where(config.delta == True)[0]:
    if i == 0:
        pass
    elif int(i / 10) != tmp:
        tmp = int(i / 10)
        print()
    print(i, end=' ')
    
data = []
for i in range(1, num_rodes + 1):
    tmp = utils.load_data(directory.format(repetition), data_filename.format(i))
    if not np.alltrue(tmp['eta_theta'] > 0):
        raise RuntimeError('Negative theta value encountered for rode {}'.format(i))
    data.append(tmp)

X_mean = utils.get_X_mean(data)
X_var = utils.get_X_var(data)
theta_mean = utils.get_theta_mean(data)
theta_var = utils.get_theta_var(data)

with open(os.path.join(vgpamf_directory, vgpamf_filename.format(repetition)), 'rb') as infile:
    data = io.loadmat(infile)
vgpamf_X_mean = data['mt'][:, config.est_t_indices]
vgpamf_X_var = data['st'][:, config.est_t_indices]

# Plotting state estimation result
figure = plt.figure(figsize=[10, 12])
for idx, i in enumerate([95, 357, 119, 212]):
    # Plotting LPMF-SDE result
    ax = figure.add_subplot(4, 2, idx * 2 + 1)
    ax.plot(config.spl_tps, config.spl_X[i], color='C0', linestyle='-', linewidth=1.5, label='Sample path')
    if config.delta[i]:
        ax.scatter(config.obs_tps, config.obs_Y[i], color='C1', marker='x', label='Observation')
    ax.errorbar(config.est_tps, X_mean[i], color='C2', linestyle='--', linewidth=1.5, label='Estimation', 
                yerr=np.sqrt(X_var[i]), ecolor='0', elinewidth=1., capsize=3., capthick=.5)
    ax.set_xlabel('Time')
    ax.set_ylabel('State {}'.format(i + 1))
    ax.set_xlim([config.spl_t_0, config.spl_t_T])
    ax.set_ylim([-12, 16])
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles=handles, labels=labels, loc=0)
    if idx == 0:
        ax.set_title('LPMF-SDE')
    
    # Plotting VGPA-MF result
    ax = figure.add_subplot(4, 2, idx * 2 + 2)
    ax.plot(config.spl_tps, config.spl_X[i], color='C0', linestyle='-', linewidth=1.5, label='Sample path')
    if config.delta[i]:
        ax.scatter(config.obs_tps, config.obs_Y[i], color='C1', marker='x', label='Observation')
    ax.errorbar(config.est_tps, vgpamf_X_mean[i], color='C2', linestyle='--', linewidth=1.5, label='Estimation', 
                yerr=np.sqrt(vgpamf_X_var[i]), ecolor='0', elinewidth=1., capsize=3., capthick=.5)
    ax.set_xlabel('Time')
    ax.set_xlim([config.spl_t_0, config.spl_t_T])
    ax.set_ylim([-12, 16])
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles=handles, labels=labels, loc=0)
    if idx == 0:
        ax.set_title('VGPA-MF')
figure.tight_layout()   
plt.show()
figure.savefig('lorenz-96-states.eps', format='eps', dpi=1000, bbox_inches='tight')

# Plotting parameter estimation result
figure = plt.figure(figsize=plt.figaspect(1))
bar_width = 0.15
bar_indices = np.arange(config.theta.size)
ax = plt.gca()
ax.bar(bar_indices, config.theta, bar_width, color='C0', edgecolor='black', label='Truth')
ax.bar(bar_indices + bar_width, theta_mean, bar_width, yerr=np.sqrt(theta_var),
       color='C2', edgecolor='black', label='LPMF-SDE', 
       error_kw=dict( ecolor='0', elinewidth=1., capsize=3., capthick=.5))
ax.set_ylabel('Value')
ax.set_xlabel('Parameter')
ax.set_xlim([-0.35, 0.55])
ax.set_ylim([0, 10])
ax.set_xticks(bar_indices + bar_width / 2)
ax.set_xticklabels([r'${}$'.format(label) for label in dynamical.theta_labels])
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles=handles, labels=labels, loc=0)
figure.tight_layout()
plt.show()
figure.savefig('lorenz-96-parameters.eps', format='eps', dpi=1000, bbox_inches='tight')    

- Plot the RMSE of state estimation
- Plot the parameter estimation box plot
- Plot the runtime box plot

In [None]:
rmse = []
vgpamf_rmse = []

theta_mean = []

runtime_mean = []
vgpamf_runtime = []

for repetition in range(1, num_repetitions + 1):
    # Load data
    config = core.Config()
    config.load_config(directory.format(repetition), config_filename)
    
    # Load data from LPMF-SDE
    data = []
    for i in range(1, num_rodes + 1):
        tmp = utils.load_data(directory.format(repetition), data_filename.format(i))
        if not np.alltrue(tmp['eta_theta'] > 0):
            raise RuntimeError('Negative theta value encountered for rode {}'.format(i))
        data.append(tmp)
    
    X_mean = utils.get_X_mean(data)
    rmse.append([
        metrics.mean_squared_error(X_mean[i], config.spl_X[i, config.est_t_indices]) 
        for i in range(config.spl_X.shape[0])
    ])    
    theta_mean.append(list(utils.get_theta_mean(data)))
    runtime_mean.append(utils.get_runtime_mean(data))
    
    # Load data from VGPA-MF
    with open(os.path.join(vgpamf_directory, vgpamf_filename.format(repetition)), 'rb') as infile:
        data = io.loadmat(infile)
    vgpamf_X_mean = data['mt'][:, config.est_t_indices]
    vgpamf_rmse.append([
        metrics.mean_squared_error(vgpamf_X_mean[i], config.spl_X[i, config.est_t_indices])
        for i in range(config.spl_X.shape[0])
    ])
    vgpamf_runtime.append(data['runtime'].ravel()[0])
    
rmse = np.sqrt(rmse).T
theta_mean = np.array(theta_mean).T
runtime_mean = np.array(runtime_mean)

vgpamf_rmse = np.sqrt(vgpamf_rmse).T
vgpamf_runtime = np.array(vgpamf_runtime)

boxprops = dict(linestyle='-', linewidth=1., color='0')
medianprops = dict(linestyle='-', linewidth=1.2, color='red')
meanpointprops = dict(marker='D', markersize=6., markeredgecolor='green', markerfacecolor='green')

# Box plot for state estimation RMSE
figure = plt.figure(figsize=plt.figaspect(0.6))
ax = plt.gca()
rmse_data = [
    np.mean(rmse[config.delta == True, :], axis=0),
    np.mean(vgpamf_rmse[config.delta == True, :], axis=0),
    np.mean(rmse[config.delta == False, :], axis=0),
    np.mean(vgpamf_rmse[config.delta == False, :], axis=0)
]
labels = [
    'LPMF-SDE\nRMSE$_{obs}$', 
    'VGPA-MF\nRMSE$_{obs}}$', 
    'LPMF-SDE\nRMSE$_{unobs}$', 
    'VGPA-MF\nRMSE$_{unobs}$'
]
ax.boxplot(rmse_data, labels=labels, notch=False, showfliers=False, showmeans=True, 
           boxprops=boxprops, medianprops=medianprops, meanprops=meanpointprops, whis=[5, 95])
ax.set_ylabel('RMSE')
ax.set_xlabel('Method')
figure.tight_layout()
plt.show()
figure.savefig('lorenz-96-states-boxplot.eps', format='eps', dpi=1000, bbox_inches='tight')

# Box plot for parameter estimation
figure = plt.figure(figsize=plt.figaspect(1))
ax = plt.gca()
theta_data = theta_mean
labels = ['$F$']
ax.boxplot(theta_data, labels=labels, notch=False, showfliers=False, showmeans=True, 
           boxprops=boxprops, medianprops=medianprops, meanprops=meanpointprops, whis=[5, 95])
ax.plot(np.arange(3), np.full(3, 8), linestyle='--', label='Truth')
ax.set_ylim([7.5, 8.2])
ax.set_xlabel('Parameter')
# ax.set_ylabel('Value') The y_label is shared with another plot
handles, _ = ax.get_legend_handles_labels()
ax.legend(handles=handles, labels=['Truth'], loc=0)    
figure.tight_layout()
plt.show()
figure.savefig('lorenz-96-parameters-boxplot.eps', format='eps', dpi=1000, bbox_inches='tight')

# Box plot for runtime
figure = plt.figure(figsize=plt.figaspect(1))
ax = plt.gca()
runtime_data = np.array([
    runtime_mean,
    vgpamf_runtime
]) 
labels = ['LPMF-SDE\n', 'VGPA-MF\n']
ax.boxplot(runtime_data.T, labels=labels, notch=False, showfliers=False, showmeans=True, 
           boxprops=boxprops, medianprops=medianprops, meanprops=meanpointprops, whis=[5, 95])
ax.set_xlabel('Method')
ax.set_ylabel('Runtime (s)')
ax.set_ylim([2000, 3400])
figure.tight_layout()
plt.show()
figure.savefig('lorenz-96-runtime-boxplot.eps', format='eps', dpi=1000, bbox_inches='tight')

Plot grid search result from VGPA-MF

In [None]:
vgpamf_theta_costs = []
repetition = 6
thetas = np.linspace(1, 15, 29)
for theta in thetas:
    with open(os.path.join(vgpamf_directory, vgpamf_grid_filename.format(repetition, theta)), 'rb') as infile:
        data = io.loadmat(infile)
    index = data['mfStat'][0][0].ravel()[0] - 1
    vgpamf_theta_costs.append(data['mfStat'][0][1].ravel()[index])
    
figure = plt.figure(figsize=plt.figaspect(1))
ax = plt.gca()
ax.axvline(x=8.0, linestyle='--', color='C0', label='Truth')
ax.scatter(thetas, vgpamf_theta_costs, color='C2', marker='.', label='VGPA-MF')
ax.set_xlabel('$F$')
ax.set_ylabel('Variational free energy')
ax.set_ylim([22000, 32000])
ax.set_xlim([0, 16])
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles=handles, labels=labels, loc=0)
figure.tight_layout()
plt.show()
figure.savefig('lorenz-96-parameters-grid-search.eps', format='eps', dpi=1000, bbox_inches='tight')   

In [None]:
# Helper to tranform VPGA sample path into our settings
for repetition in range(1, num_repetitions + 1):
    with open(os.path.join(directory.format(repetition), 'config.mat'), 'rb') as infile:
        config_mat = io.loadmat(infile)
        
    config = core.Config()
    config.create_time(0, 4, 100, 0, 4, 1, 0, 4, 1)

    config.obs_freq = 8    
    config.obs_t_indices = np.array(config_mat['obsX'].ravel(), dtype=np.int)
    config.obs_tps = config.spl_tps[config.obs_t_indices].copy()
    
    config.est_freq = config.obs_freq
    config.est_t_indices = config.obs_t_indices.copy()    
    config.est_tps = config.spl_tps[config.est_t_indices]
        
    config.X_0 = np.array(list(config_mat['Xt'][:, 0]), dtype=np.float)
    num_x = config.X_0.shape[0]
    config.theta = np.array([8.]) 
    config.rho_2 = np.full(num_x, 4.) 
    
    config.phi = [
        # (Kernal name, Kernal parameters)
        ('rbf', np.array([4.2, 0.1]))
    ] * num_x
    config.sigma_2 = np.full(num_x, 1.) 
    config.delta = np.full(num_x, False)
    config.delta[config_mat['dMask'] - 1] = True
    config.gamma = np.full(num_x, 5e-2) 
    config.gamma[config.delta] = 1e-1

    config.opt_method = 'Newton-CG'
    config.opt_tol = 1e-6
    config.max_init_iter = 10
    config.max_iter = 1000

    config.plotting_enabled = False
    config.plotting_freq = 50
    
    config.spl_X = np.array(list(config_mat['Xt']))
    config.obs_Y = np.zeros((num_x, config.obs_tps.size))
    config.obs_Y[config.delta, :] = np.array(list(config_mat['obsY']))    
    
    config.save_config(directory.format(repetition), config_filename)

In [None]:
# Helper to check that the result are matching
for repetition in range(1, num_repetitions + 1):
    with open(os.path.join(directory.format(repetition), 'config.mat'), 'rb') as infile:
        config_mat = io.loadmat(infile)    

    config_i = utils.load_data(directory.format(repetition), config_filename)
    
    assert np.alltrue(config_mat['Xt'] == config_i['spl_X'])
    
    with open(os.path.join(vgpamf_directory, vgpamf_config_filename).format(repetition), 
              'rb') as infile:
        config_mat_original = io.loadmat(infile)
        
    assert np.alltrue(config_mat['Xt'] == config_mat_original['Xt'])
    
    for j in range(repetition, num_repetitions + 1):
        config_j = utils.load_data(directory.format(j), config_filename)
        if repetition == j:
            assert np.all(config_i['spl_X'] == config_j['spl_X'])
        else:
            assert np.any(config_i['spl_X'] != config_j['spl_X'])        