In [3]:
import time
import openturns as ot
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from shapley import SobolIndices, SobolKrigingIndices, create_df_from_gp_indices, create_df_from_mc_indices
from shapley.tests import Ishigami
from shapley.plots import set_style_paper, plot_violin

%load_ext autoreload
%autoreload 2

%matplotlib inline

set_style_paper()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
ishigami = Ishigami()
dim = ishigami.ndim
model = ishigami
input_distribution = ishigami.input_distribution
ishigami_true_indices = ishigami.first_order_sobol_indices

In [None]:
max_budgets = [50]
n_samples = [50, 500, 1000, 2000, 5000]
n_boot = 500
n_realization = 300
basis_type = 'linear'
kernels = ['matern']

n_sample_cv = 1000
ylim = [-.3, 1.]
with_MC_estimate = False
mean_method = True
savefig = False

sobol = SobolIndices(input_distribution)
sobol_kriging = SobolKrigingIndices(input_distribution)
same_bootstrap = True

all_results = []

for max_budget in max_budgets:
    for kernel in kernels:
        if with_MC_estimate:
            sobol.build_mc_sample(model, int(max_budget/(2*dim)))
            first_indices_mc = sobol.compute_indices(n_boot=n_boot)
            df_first_indices_mc = create_df_from_mc_indices(first_indices_mc)

        meta_model = sobol_kriging.build_meta_model(model, n_sample=max_budget, basis_type=basis_type, kernel=kernel)

        score_q2_cv = meta_model.compute_score_q2_cv(n_sample_cv, sampling='lhs')
        print('Kriging with {0} Cross Validation Q2 score with {1} observations: {2}'.format(kernel, n_sample_cv, score_q2_cv))
        for n_sample in n_samples:
            t_start = time.time()
            print('n_sample_gp: %d, max_budget: %d' % (n_sample, max_budget))

            sobol_kriging.build_mc_sample(meta_model, n_sample, n_realization)
            first_indices_gp = sobol_kriging.compute_indices(n_boot=n_boot, same_bootstrap=same_bootstrap)
            mean_first_indices_gp = first_indices_gp.reshape(dim, -1).mean(axis=1)
            error = abs(mean_first_indices_gp - ishigami_true_indices)
            print('Absolute error:', error)
            var_tot = first_indices_gp.reshape(dim, -1).var(axis=1)
            var_MC = first_indices_gp.var(axis=2).mean(axis=1)
            var_gp = first_indices_gp.var(axis=1).mean(axis=1)
            print('MC var:', var_MC)
            print('Kriging var:', var_gp)
            print('Sum of vars:', var_gp + var_MC)
            print('Total var:', var_tot)
            print('Difference ratio vars:', abs(var_tot - (var_gp + var_MC))/var_tot)

            if with_MC_estimate:
                df_first_indices_mc = create_df_from_indices(first_indices_mc)
            df_first_indices_gp = create_df_from_gp_indices(first_indices_gp, mean_method=mean_method)

            fig, axes = plt.subplots(2, 1, sharex=True, sharey=True, figsize=(8, 7))
            ax = axes[0]
            sns.violinplot(x='Variables', y='Indice values', data=df_first_indices_gp, ax=ax)
            ax.set_xlabel('')
            ax.plot(ishigami_true_indices, 'yo', markersize=11, label='True indices')
            ax.set_title('Sobol GP Estimate: %s, $n=%d$, $m=%d$, Q2=%.2f' % (kernel, max_budget, n_sample, score_q2_cv))
            ax.legend(loc=0)
            ax.set_ylim(ylim)

            ax = axes[1]
            sns.violinplot(x='Variables', y='Indice values', data=df_first_indices_gp, hue='Error', ax=ax, split=True)
            ax.set_xlabel('')
            ax.plot(ishigami_true_indices, 'yo', markersize=11, label='True indices')
            ax.legend(loc=0)
            ax.set_ylim(ylim)

            fig.tight_layout()
            if savefig:
                fig.savefig('output/ishigami_sobolGP_n_%d_Nz_%d_m_%d_B_%d.pdf' % (max_budget, n_realization, n_sample, n_boot))
                fig.savefig('output/ishigami_sobolGP_n_%d_Nz_%d_m_%d_B_%d.png' % (max_budget, n_realization, n_sample, n_boot))

            print('Elapsed time: %.2f s\n' % (time.time() - t_start))

            all_results.append(first_indices_gp)

Kriging with matern Cross Validation Q2 score with 1000 observations: 0.39809431369006565
n_sample_gp: 50, max_budget: 50
Absolute error: [ 0.11476248  0.41013945  0.0457777 ]
MC var: [ 0.01722428  0.01688998  0.02153633]
Kriging var: [ 0.02863649  0.02045773  0.02868729]
Sum of vars: [ 0.04586077  0.03734771  0.05022362]
Total var: [ 0.03464967  0.02676408  0.0367274 ]
Difference ratio vars: [ 0.32355599  0.3954412   0.3674702 ]
Elapsed time: 0.86 s

n_sample_gp: 500, max_budget: 50
Absolute error: [ 0.08067506  0.37576788  0.04101332]
MC var: [ 0.00172219  0.00199708  0.0023468 ]
Kriging var: [ 0.00782222  0.00335358  0.00417056]
Sum of vars: [ 0.00954441  0.00535066  0.00651736]
Total var: [ 0.0083698   0.0041262   0.00508305]
Difference ratio vars: [ 0.14033982  0.29675093  0.28217604]
Elapsed time: 16.20 s

n_sample_gp: 1000, max_budget: 50
Absolute error: [ 0.10259165  0.37066641  0.07039578]
MC var: [ 0.00093979  0.00105622  0.00124038]
Kriging var: [ 0.00589809  0.00197994  0.0

In [None]:
i = 1
n_results = len(all_results)
fig, axes = plt.subplots(n_results, 1, figsize=(8, 3*n_results), sharex=True, sharey=True)
for k in range(n_results):
    ax = axes[k]
    sns.distplot(all_results[k][i, :, :].mean(axis=1), label='Var kriging', norm_hist=True, ax=ax)
    sns.distplot(all_results[k][i, :, :].mean(axis=0), label='Var MC', norm_hist=True, ax=ax)
    sns.distplot(all_results[k][i, :, :].reshape(-1, 1), label='Var Total', norm_hist=True, ax=ax)
    ax.plot([ishigami_true_indices[i]]*2, [0., 30.], '.-y', linewidth=5)
    if k == 0:
        ax.set_title('Estimated indices for $S_%d$' % (i+1))
    ax.legend(loc=0)
    ax.axis('tight')
    ax.set_xlim([-0.1, 0.6])
    ax.set_ylabel('Density')
ax.set_xlabel('$\hat S_%d$' % (i+1))
fig.tight_layout()