In [None]:
import logging
logging.basicConfig()
import variational_bayes as vb
import numpy as np
from matplotlib import pyplot as plt
from tqdm import tqdm_notebook
import sklearn.metrics
import networkx as nx
%matplotlib inline

In [None]:
np.random.seed(3)
num_nodes = 50
num_groups = 3

z = np.random.choice(num_groups, num_nodes)

proba = np.random.uniform(0, .05, (num_groups, num_groups))
proba[np.diag_indices_from(proba)] += 0.3

_proba = proba[z[:, None], z[None, :]]
adjacency = np.random.uniform(0, 1, _proba.shape) < _proba
onehot = vb.onehot(z, num_groups)

graph = nx.from_numpy_matrix(adjacency)
pos = nx.spring_layout(graph)

fig, (ax1, ax2) = plt.subplots(1, 2)
ax1.imshow(proba)
nx.draw_networkx_edges(graph, pos, alpha=.5, ax=ax2)
nx.draw_networkx_nodes(graph, pos, node_color=z, cmap='Set1', node_size=50, ax=ax2)

print("Mean degree: %f" % (np.sum(adjacency) / num_nodes))
print("Sizes: %s" % np.bincount(z))

In [None]:
def stochastic_block_model(adjacency, num_groups, eps=0.1):
    adjacency = adjacency.astype(float)
    num_nodes, _ = adjacency.shape
    
    # Standard approach
    proba = np.random.dirichlet(1 * np.ones(num_groups), num_nodes)
    
    q_z = vb.CategoricalDistribution(proba)
    q_proba = vb.BetaDistribution(
        np.ones((num_groups, num_groups)) * np.mean(adjacency),
        np.random.uniform(max(1 - eps, 0), 1 + eps, (num_groups, num_groups))
    )
    
    likelihoods = [
        vb.InteractingMixtureDistribution(q_z, vb.BernoulliDistribution(q_proba)).likelihood(adjacency[..., None, None]),
        vb.CategoricalDistribution(np.ones(num_groups) / num_groups).likelihood(q_z),
        vb.BetaDistribution(1, 1).likelihood(q_proba),
    ]
    
    return vb.InteractingMixtureModel({'z': q_z, 'proba': q_proba}, likelihoods, ['z', 'proba'])

In [None]:
ensemble = vb.ModelEnsemble(stochastic_block_model, (adjacency, num_groups, 0.5))
best_model = ensemble.update(10, None, tqdm_notebook, convergence_predicate=vb.ConvergencePredicate(1e-3, 10))
best_model.elbo

In [None]:
model = best_model

fig, (ax1, ax2) = plt.subplots(1, 2, sharex=True)
im = ax1.imshow(model['proba'].mean)
plt.colorbar(im, ax=ax1)
im = ax2.imshow(model['z'].mean[np.argsort(z)], aspect='auto')
plt.colorbar(im, ax=ax2)
sklearn.metrics.adjusted_rand_score(z, np.argmax(model['z'].mean, axis=1))
fig.tight_layout()

In [None]:
list_num_groups = [1, 2, 3, 4, 5, 6]
elbos = []
for _num_groups in tqdm_notebook(list_num_groups):
    ensemble = vb.ModelEnsemble(stochastic_block_model, (adjacency, _num_groups))
    ensemble.update(20, None, convergence_predicate=vb.ConvergencePredicate(1e-3, 10))
    elbos.append(ensemble.best_elbo)

In [None]:
plt.plot(list_num_groups, np.asarray(elbos) - np.max(elbos), marker='.')
plt.axvline(num_groups)
plt.xlabel('Number of groups')
plt.ylabel('ELBO - max(ELBO)')