In [None]:
import logging
logging.basicConfig()
import variational_bayes as vb
import numpy as np
from matplotlib import pyplot as plt
from tqdm import tqdm_notebook
import sklearn.metrics
import networkx as nx
%matplotlib inline

In [None]:
def simulate(num_nodes=50, num_groups=3, seed=3, mask=None):
    if seed is not None:
        np.random.seed(seed)

    z = np.random.choice(num_groups, num_nodes)
    onehot = vb.onehot(z, num_groups)

    proba = np.random.uniform(0, .05, (num_groups, num_groups))
    proba[np.diag_indices_from(proba)] += 0.2

    _proba = proba[z[:, None], z[None, :]]
    adjacency = np.random.uniform(0, 1, _proba.shape) < _proba
    # Mask the diagonal of the array with a fixed value
    if mask is not None:
        adjacency[np.diag_indices_from(adjacency)] = mask
    
    return {
        'num_nodes': num_nodes,
        'num_groups': num_groups,
        'seed': seed,
        'z': z,
        'onehot': onehot,
        'proba': proba,
        'adjacency': adjacency,
        'mask': mask,
    }

simulation = simulate(seed=3, mask=1e18)
graph = nx.from_numpy_matrix(simulation['adjacency'])
graph.remove_edges_from(graph.selfloop_edges())
pos = nx.spring_layout(graph)

fig, (ax1, ax2) = plt.subplots(1, 2)
ax1.imshow(simulation['proba'])
nx.draw_networkx_edges(graph, pos, alpha=.5, ax=ax2)
nx.draw_networkx_nodes(graph, pos, node_color=simulation['z'], cmap='Set1', node_size=50, ax=ax2)

print("Mean degree: %f" % (sum(graph.degree().values()) / simulation['num_nodes']))
print("Sizes: %s" % np.bincount(simulation['z']))

In [None]:
def stochastic_block_model(adjacency, num_groups, eps=0.5, mask=None):
    adjacency = adjacency.astype(float)
    num_nodes, _ = adjacency.shape
    
    proba = np.random.dirichlet(1 * np.ones(num_groups), num_nodes)
    q_z = vb.CategoricalDistribution(proba)
    
    adjacency_mean = np.mean(adjacency) if mask is None else np.mean(adjacency[adjacency != mask])
    q_proba = vb.BetaDistribution(
        np.ones((num_groups, num_groups)) * adjacency_mean,
        np.random.uniform(max(1 - eps, 0), 1 + eps, (num_groups, num_groups))
    )
    
    likelihoods = [
        vb.InteractingMixtureDistribution(
            q_z, vb.BernoulliDistribution(q_proba), self_interaction=mask is None
        ).likelihood(adjacency[..., None, None]),
        vb.CategoricalDistribution(np.ones(num_groups) / num_groups).likelihood(q_z),
        vb.BetaDistribution(.5, .5).likelihood(q_proba),
    ]
    
    return vb.InteractingMixtureModel({'z': q_z, 'proba': q_proba}, likelihoods, ['z', 'proba'])

In [None]:
ensemble = vb.ModelEnsemble(stochastic_block_model, (simulation['adjacency'], simulation['num_groups']),
                            {'mask': simulation['mask']})
best_model = ensemble.update(50, None, tqdm_notebook, num_processes=4,
                             convergence_predicate=vb.ConvergencePredicate(1e-3, 10))
plt.plot(np.sort(ensemble.elbos), marker='.')

In [None]:
model = best_model

fig, (ax1, ax2) = plt.subplots(1, 2, sharex=True)
im = ax1.imshow(model['proba'].mean)
plt.colorbar(im, ax=ax1)
im = ax2.imshow(model['z'].mean[np.argsort(simulation['z'])], aspect='auto')
plt.colorbar(im, ax=ax2)
sklearn.metrics.adjusted_rand_score(simulation['z'], np.argmax(model['z'].mean, axis=1))
fig.tight_layout()

plt.figure()
vb.plot_comparison(model['proba'], simulation['proba'], marker='.')
plt.title('Probabilities')

In [None]:
list_num_groups = [1, 2, 3, 4, 5, 6]
elbos = []
ensembles = []

fig = plt.figure()
ax = fig.add_subplot(111)

for _num_groups in tqdm_notebook(list_num_groups):
    # Create the ensemble and update it
    ensemble = vb.ModelEnsemble(stochastic_block_model, (simulation['adjacency'], _num_groups))
    ensemble.update(50, None, convergence_predicate=vb.ConvergencePredicate(1e-3, 10), num_processes=4)
    elbos.append(ensemble.best_elbo)
    ensembles.append(ensemble)
    
    # Plot the distribution of values
    ax.plot(np.sort(ensemble.elbos), label=str(_num_groups), marker='.')
    
    # Provide visualisation of the inferred assignments
    plt.figure()
    nx.draw_networkx_edges(graph, pos, alpha=.5)
    nx.draw_networkx_nodes(graph, pos, node_color=np.argmax(ensemble.best_model['z'].mean, axis=1), 
                           cmap='Set1', node_size=50)
    plt.title(str(_num_groups))
    
ax.legend()

In [None]:
plt.plot(list_num_groups, np.asarray(elbos) - np.max(elbos), marker='.')
plt.axvline(simulation['num_groups'])
plt.xlabel('Number of groups')
plt.ylabel('ELBO - max(ELBO)')