In [None]:
import variational_bayes as vb
import numpy as np
from matplotlib import pyplot as plt, rcParams
import scipy.stats
from tqdm import tqdm_notebook
from sklearn import metrics
%matplotlib inline

rcParams['figure.dpi'] = 144
rcParams['scatter.marker'] = '.'
np.random.seed(1)

In [None]:
list_sizes = [20, 50, 100]
for size in list_sizes:
    list_fraction = np.linspace(0.9, 1.1, 21)
    list_num_samples = list_fraction * size
    num_runs = 100
    pd = []
    for num_samples in list_num_samples:
        num_samples = int(num_samples)
        pd.append([vb.is_positive_definite(np.cov(np.random.normal(0, 1, (num_samples, size)), rowvar=False))
                   for _ in range(num_runs)])

    plt.errorbar(list_fraction, np.mean(pd, axis=1), np.std(pd, axis=1) / np.sqrt(num_runs - 1), 
                 marker='.', label=str(size))
    
plt.legend()

In [None]:
num_groups = 3
num_nodes = 50
order = 2

In [None]:
fractions = 0.025 * 2 ** np.arange(11)
list_num_steps = num_nodes * (num_nodes * order + 1) * fractions
list_num_steps

In [None]:
def simulate_parameters():
    # Generate group sizes and groups
    density = np.random.dirichlet(100 * np.ones(num_groups)) # was 10 when things didn't work
    z = np.random.choice(num_groups, num_nodes, p=density)
    onehot = np.zeros((num_nodes, num_groups))
    onehot[np.arange(num_nodes), z] = 1

    # Sample noise precisions for all groups
    noise_precision = np.random.gamma(5000, size=num_groups)
    # noise_precision = 100

    # Sample means and precisions of autoregressive coefficients
    adjacency_mean = np.random.normal(0, 1e-2, size=(num_groups, num_groups, order))
    adjacency_precision = scipy.stats.wishart.rvs(1e5, np.eye(order), size=(num_groups, num_groups))
    if adjacency_precision.ndim < 4:
        adjacency_precision = adjacency_precision.reshape((num_groups, num_groups, 1, 1))

    # Sample the means and precisions of the bias
    bias_mean = np.random.normal(0, 0.1, num_groups)
    bias_precision = np.random.gamma(1e4, 1, num_groups)

    # Sample the matrix of autoregressive coefficients
    cholesky = np.linalg.cholesky(np.linalg.inv(adjacency_precision))
    cholesky = cholesky[z[:, None], z[None, :]]
    adjacency = adjacency_mean[z[:, None], z[None, :]] + \
        np.einsum('...ij,...j', cholesky, np.random.normal(0, 1, (num_nodes, num_nodes, order)))

    # Sample the bias
    bias = np.random.normal(0, 1, num_nodes) / np.sqrt(bias_precision[z]) + bias_mean[z]

    # Construct the coefficients for comparison
    coefficients = vb.pack_coefficients(adjacency, bias)
    
    return {
        'coefficients': coefficients,
        'bias': bias,
        'adjacency': adjacency,
        'z': z,
        'noise_precision': noise_precision,
    }

In [None]:
num_runs = 2
list_means = []
list_stds = []
list_means2 = []
list_stds2 = []
list_coefficients = []
list_parameters = []
list_zs = []

for run in tqdm_notebook(range(num_runs)):
    parameters = simulate_parameters()
    list_parameters.append(parameters)

    means = []
    stds = []
    means2 = []
    stds2 = []
    list_coefficients.append(parameters['coefficients'])
    zs = []
    for num_steps in tqdm_notebook(list_num_steps):
        series = vb.simulate_series(parameters['bias'], parameters['adjacency'], 
                                    parameters['noise_precision'][parameters['z']], int(num_steps))

        factors = {
            'coefficients': vb.MultiNormalDistribution(
                np.zeros((num_nodes, num_nodes * order + 1)),
                np.ones((num_nodes, 1, 1)) * np.eye(num_nodes * order + 1)
            ),
            'noise_precision': vb.GammaDistribution(
                1e-3 * np.ones(num_nodes),
                1e-3 * np.ones(num_nodes)
            )
        }
        likelihoods = [
            vb.VARDistribution(factors['coefficients'], factors['noise_precision']).likelihood(
                vb.VARDistribution.summary_statistics(series, order)
            ),
            vb.GammaDistribution(1e-6, 1e-6).likelihood(factors['noise_precision']),
            vb.MultiNormalDistribution(
                np.zeros(num_nodes * order + 1), 
                np.eye(num_nodes * order + 1) * 1e-100
            ).likelihood(factors['coefficients'])
        ]

        # Model without hierarchical structure
        model = vb.Model(factors, likelihoods)
        model.update(None, convergence_predicate=1e-3)
        means.append(model['coefficients'].mean)
        stds.append(model['coefficients'].std)
        
        # Model with hierarchical structure
        model2 = vb.var_model(series, order, num_groups, shared_noise=False)
        model2.update(None, convergence_predicate=1e-3)
        means2.append(model2['coefficients'].mean)
        stds2.append(model2['coefficients'].std)
        zs.append(model2['z'].mean)

    list_means.append(means)
    list_stds.append(stds)
    list_means2.append(means2)
    list_stds2.append(stds2)
    list_zs.append(zs)
    
list_means = np.asarray(list_means)
list_stds = np.asarray(list_stds)
list_means2 = np.asarray(list_means2)
list_stds2 = np.asarray(list_stds2)
list_coefficients = np.asarray(list_coefficients)
list_zs = np.asarray(list_zs)

In [None]:
fig, [(ax1, ax2), (ax3, ax4)] = plt.subplots(2, 2, True)

threshold = 2

list_zscores = (list_means / list_stds)[..., 1:].reshape((num_runs, fractions.size, num_nodes * num_nodes * order))
for i, zs in enumerate(list_zscores):
    ax1.plot(fractions, np.mean(np.abs(zs) > threshold, axis=-1), color='C%d' % i, ls='--')
    
list_zscores2 = (list_means2 / list_stds2)[..., 1:].reshape((num_runs, fractions.size, num_nodes * num_nodes * order))
for i, zs in enumerate(list_zscores2):
    ax1.plot(fractions, np.mean(np.abs(zs) > threshold, axis=-1), color='C%d' % i)
    
ax1.set_xscale('log')
ax1.set_ylabel('Significant coefficients')
ax1.axvline(1, ls=':')

residuals = list_means - list_coefficients[:, None]
residuals = residuals[..., 1:]
list_rmse = np.mean(residuals ** 2, axis=(2, 3))
for i, rmse in enumerate(list_rmse):
    ax2.plot(fractions, rmse, color='C%d' % i, ls='--')
    
residuals = list_means2 - list_coefficients[:, None]
residuals = residuals[..., 1:]
list_rmse = np.mean(residuals ** 2, axis=(2, 3))
for i, rmse in enumerate(list_rmse):
    ax2.plot(fractions, rmse, color='C%d' % i)
    
ax2.set_ylabel('RMSE')
ax2.set_yscale('log')


list_rand_scores = []
for parameters, zs in zip(list_parameters, list_zs):
    # Compute the nmi for each
    rand_scores = [metrics.normalized_mutual_info_score(parameters['z'], np.argmax(z, axis=1)) for z in zs]
    list_rand_scores.append(rand_scores)
    
for i, rand_scores in enumerate(list_rand_scores):
    ax3.plot(fractions, rand_scores)

ax3.set_xlabel('Fraction')
ax3.set_ylabel('NMI')
ax4.set_xlabel('Fraction')


fig.tight_layout()

In [None]:
np.asarray(list_rand_scores)

In [None]:
rand

In [None]:
zs.shape

In [None]:
parameters['z']

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2)
ax1.scatter(list_coefficients[0].ravel(), list_means[0, 0].ravel())
ax2.scatter(list_coefficients[0].ravel(), list_means2[0, 0].ravel())
ax1.set_aspect('equal')