In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from results_summary import load_all_models

In [2]:
from tqdm import tqdm_notebook as tqdm
# from tqdm import tqdm

In [3]:
dl_args = dict(npoint=300, nlayer=3, nneuron=30, init_log_lam=-3, points_std=0.0, keep_prob=1.0, init_weight_std=1.0, noise_std=0.05,
                    points_type="opt", log_lam_weights=-6, step_size=1e-2, mixture_kernel=False, init_log_sigma=np.linspace(0,1,3), base=True,
                    niter=5000, ntrain=100, nvalid=100, patience=50, gpu_count=0, clip_score=True, trainable=False)

others_args = dict( n_hiddens = [100]*2,
                    n_comps = 10,
                    n_layers = 5,
                    act_fun = 'tanh',
                    mode = 'random')

* `p` is the data object
* `m` is a dict of all models
* `l` is a dict of log liks
* `s` is a dict of samples (not for dkef)

In [4]:
from kgof import density, data, kernel
from kmod import mctest
from itertools import combinations

def run_all_steins(data_name, seeds=range(15), save=True):
    model_names = None
    ps = None
    stats = None
    k = kernel.KPoly(degree=3)

    for seed_i, seed in enumerate(tqdm(seeds, unit="seed")):
        p, m, l, s = load_all_models(data_name, seed, dl_args, others_args)
        if model_names is None:
            model_names = sorted(m)
            n = len(model_names)

            ps = np.full((len(seeds), n, n), np.nan)
            stats = np.full_like(ps, np.nan)
        else:
            assert model_names == sorted(m)

        test_data = data.Data(p.test_data)

        rs = np.random.RandomState(seed=seed_i)
        V = p.test_data[rs.choice(p.test_data.shape[0], 100)]
        V += rs.normal(scale=.2, size=V.shape)

        densities = {name: density.from_grad_log(p.D, model.grad)
                     for name, model in m.iteritems()}

        for i, j in tqdm(list(combinations(range(n), 2)), unit="test"):
            d1 = densities[model_names[i]]
            d2 = densities[model_names[j]]
            res = mctest.DC_FSSD(d1, d2, k, k, V, V).perform_test(test_data)
            ps[seed_i, i, j] = res['pvalue']
            ps[seed_i, j, i] = 1 - res['pvalue']
            stats[seed_i, i, j] = res['test_stat']
            stats[seed_i, j, i] = -res['test_stat']

    if save:
        np.savez('stein_{}.npz'.format(data_name), ps=ps, stats=stats)
    return ps, stats

In [None]:
run_all_steins('redwine')