In [1]:
import sys
sys.path.append('../../sparsedense')

In [2]:
import numpy as np
import networkx as nx
from networkx.algorithms.approximation import clustering_coefficient

import matplotlib.pyplot as plt
%matplotlib inline

import pickle

In [3]:
import sparsedensefast as sd

In [4]:
def clustering_stats(K, alpha, sigma, c):
    Z = sd.sample_from_ibp(K=K, alpha=alpha, sigma=sigma, c=c)
    adj = Z.transpose() @ Z
    g = nx.from_scipy_sparse_matrix(adj)
    for n in g.nodes():
        g.remove_edge(n, n)
    num_triangles = sum(nx.triangles(g).values())
    mean_triangles = num_triangles / g.number_of_nodes()
    clustering_coeff = clustering_coefficient.average_clustering(g)
    num_nodes = g.number_of_nodes()
    num_edges = g.number_of_edges()
    mean_deg = 2.0 * num_edges / num_nodes
    
    return num_triangles, mean_triangles, clustering_coeff, num_nodes, num_edges, mean_deg

In [5]:
def generate_stats(Klist, nreps, alpha, sigma, c):
    nk = len(Klist)
    numtr = np.zeros((nk, nreps), dtype=int)
    meantr = np.zeros((nk, nreps))
    clustco = np.zeros((nk, nreps))
    nedges = np.zeros((nk, nreps), dtype=int)
    nnodes = np.zeros((nk, nreps), dtype=int)
    meandeg = np.zeros((nk, nreps))

    for r in range(nreps):
        for i, K in enumerate(Klist):
            numtr[i, r], meantr[i, r], clustco[i, r], nnodes[i, r], nedges[i, r], meandeg[i, r] = clustering_stats(K, alpha, sigma, c)
            print("Finished rep {:2d} with K: {:6d} nt: {:7d}  mt: {:7.2f} cc: {:.2f} nv: {:6d} ne: {:7d} deg: {:.2f}"
                  .format(r + 1, K, numtr[i, r], meantr[i, r], clustco[i, r], nnodes[i, r], nedges[i, r], meandeg[i, r]))
    
    return numtr, meantr, clustco, nnodes, nedges, meandeg

# Experiments

In [6]:
alpha = 7
# sigma_list = [0.1, 0.25, 0.5, 0.75, 0.9]
sigma_list = [0.9]
c = 5.0
Klist = [int(10 ** x) for x in np.linspace(2, 5, 7)]
nreps = 5

for sigma in sigma_list:
    print("================== sigma:", str(sigma), "=============================")
    num_triang, mean_triang, clust_coeff, num_nodes, num_edges, mean_deg = generate_stats(Klist[:7], nreps, alpha, sigma, c)
    filename = 'stats_sigma_0_{}.pkl'.format(str(sigma)[2:])
    with open(filename, 'wb') as fn:
        pickle.dump((num_triang, mean_triang, clust_coeff, num_nodes, num_edges, mean_deg), fn)

Finished rep  1 with K:    100 nt:   15996  mt:   26.35 cc: 0.96 nv:    607 ne:    2392 deg: 7.88
Finished rep  1 with K:    316 nt:   58644  mt:   36.11 cc: 0.94 nv:   1624 ne:    7823 deg: 9.63
Finished rep  1 with K:   1000 nt:  168759  mt:   36.43 cc: 0.93 nv:   4632 ne:   22634 deg: 9.77
Finished rep  1 with K:   3162 nt:  500601  mt:   38.25 cc: 0.93 nv:  13089 ne:   65336 deg: 9.98
Finished rep  1 with K:  10000 nt: 2429361  mt:   66.35 cc: 0.94 nv:  36613 ne:  220045 deg: 12.02


KeyboardInterrupt: 