In [1]:
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.patches as patches
from scipy import sparse
from scipy.optimize import linear_sum_assignment
import inference
import cluster
import generation
import metrics
from spectral_operators import BetheHessian

%load_ext autoreload
%autoreload 2

In [2]:
# Matplotlib settings

# plt.style.use('seaborn-whitegrid')

basic_line_color = '#808080'
black = '#222222'
darkgray = '#A9A9A9'
highlight = '#00B2EE'

#color0 = '#7570b3'
#color1 = '#d95f02'
#color3 = '#1b9e77'

color0 = '#1f78b4'
color1 = '#b2df8a'
color2 = '#a6cee3'

plt.rc('figure', figsize=(8, 5))
plt.rc('font', size=16)
plt.rc('font', family='sans-serif')
plt.rcParams['font.sans-serif'] = 'verdana'
plt.rcParams['lines.linewidth'] = 4
plt.rcParams['axes.spines.right'] = False
plt.rcParams['axes.spines.top'] = False
plt.rcParams['axes.edgecolor'] = basic_line_color
plt.rcParams['xtick.color'] = basic_line_color
plt.rcParams['ytick.color'] = basic_line_color
plt.rcParams['axes.labelsize'] = 'large'
plt.rcParams['lines.markersize'] = 12

%config InlineBackend.figure_format = 'retina'

In [3]:
def best_score(score_mat, hier):
    groups = [part.k for part in hier]
    pred_lvls, true_lvls = score_mat.shape
    scores = np.zeros(true_lvls)
    n_groups = np.zeros(true_lvls)
    pred_ind, true_ind = linear_sum_assignment(-score_mat)
    for pl, idx in zip(pred_ind, true_ind):
        scores[idx] =  score_mat[pl, idx]
        n_groups[idx] = groups[pl]
    return scores, n_groups

In [4]:
def evecs_from_BetheHessian(A, num_groups=-1, regularizer='BHa',
                              clustermode='KM', norm=False):
    """
    Perform one round of spectral clustering using the Bethe Hessian
    """

    # construct both the positive and the negative variant of the BH
    BH_pos = BetheHessian(A, regularizer)
    BH_neg = BetheHessian(A, regularizer+'n')

    BH_pos.find_k_eigenvectors(num_groups, which='SA')
    BH_neg.find_k_eigenvectors(num_groups, which='SA')

    # combine both sets of eigenvales and eigenvectors and take first k
    combined_evecs = np.hstack([BH_pos.evecs, BH_neg.evecs])
    combined_evals = np.hstack([BH_pos.evals, BH_neg.evals])
    index = np.argsort(combined_evals)
    combined_evecs = combined_evecs[:, index[:num_groups]]

    return combined_evecs

In [5]:
def runExp(n, c_bar, reps, snrs, sym, parameters, fileid='', reverse=False):
    n_levels = parameters['n_levels']
    grps_per_lvl = parameters['grps_per_lvl']
    for snr in snrs:
        print(f'SNR={snr}')
        tp = 0.
        tr = 0.
        lvl_scores_snr = 0
        n_lvl = 0
        if sym:
            dendro = generation.create2paramGHRG(n, snr, c_bar, n_levels, grps_per_lvl)
        else:
            dendro = generation.createAsymGHRG(n, snr, c_bar, n_levels, grps_per_lvl)
        num_groups = [part.k for part in dendro]
        for rep in range(reps):
            Ag = dendro.sample_network()
            if reverse:
                Ag = sparse.triu(Ag[::-1], 1)
                Ag += Ag.T
            amis = []
            k = np.max(num_groups)
            combined_evecs = evecs_from_BetheHessian(Ag, num_groups=k)
            for true_part in dendro:
                k = true_part.k
                inf_part = cluster.find_partition(combined_evecs, k, normalization=parameters['BHnorm'])
                ami = metrics.adjusted_mutual_info_score(inf_part.pvec, 
                                                         true_part.pvec_expanded,
                                                         average_method="arithmetic")
                amis.append(ami)
                
            hier = inference.infer_hierarchy(Ag, parameters=parameters)
            score_mat = metrics.calculate_level_comparison_matrix(hier, dendro)
            ls, ng = best_score(score_mat, hier)
            lvl_scores_snr += ls
            nl = score_mat.shape[0]
            n_lvl += nl
            p, r = metrics.calculate_precision_recall(score_mat)
            tp+=p
            tr+=r
            resultstr = f'{snr} {p} {r} {nl} '
            resultstr += ''.join([f'{lsi} ' for lsi in ls])
            resultstr += ''.join([f'{ngi} ' for ngi in ng])
            resultstr += ''.join([f'{amisi} ' for amisi in amis])
            resultstr += '\n'
            print(resultstr)
            if sym:
                results_file = f'sym_results{fileid}.txt'
            else:
                results_file = f'asym_results{fileid}.txt'
            with open(results_file, 'a') as rfile:
                rfile.write(resultstr)


In [6]:
c_bar = 50
n = (4 ** 7 + 4 ** 6)
reps = 2
snrs = np.linspace(0, 10, 21)
parameters = {}
parameters['reps'] = 20
parameters['noise'] = 2e-2
parameters['BHnorm'] = False
parameters['Lnorm'] = False
parameters['n_levels'] = 3
parameters['grps_per_lvl'] = 2

In [None]:
for rr in range(25):
    fileid = 'useW'
    runExp(n, c_bar, reps, snrs, True, parameters, fileid, False)
    fileid = 'useW_and_reverse'
    runExp(n, c_bar, reps, snrs, True, parameters, fileid, True)

In [7]:
def read_scores(sym, fileid):
    precision = []
    recall = []
    lvl_scores = []
    n_levels = []
    mean_scores = []
    if sym:
        results_file = f'results_me/sym_results{fileid}.txt'
    else:
        results_file = f'results_me/asym_results{fileid}.txt'
    with open(results_file) as rfile:
        results = np.float64([row.strip().split() for row in rfile.readlines()])
    snrs = np.unique(results[:,0])
    for snr in snrs:
        snr_res = results[results[:, 0]==snr, :]
#         mean_scores.append([np.mean(snr_res[:, i]) for i in range(1,snr_res.shape[1])])
        mean_scores.append(np.mean(snr_res, 0)[1:])
    mean_scores = np.array(mean_scores)
    
    precision = mean_scores[:, 0]
    recall = mean_scores[:, 1]
    n_levels = mean_scores[:, 2]
    lvl_scores = mean_scores[:, 3:6]
    n_groups = mean_scores[:, 6:9]
    lvl_scores_with_k = mean_scores[:, 9:12]
    
    return precision, recall, n_levels, lvl_scores, n_groups, lvl_scores_with_k

In [None]:
fileid = 'useW'
precision, recall, n_levels, lvl_scores, n_groups, lvl_scores_with_k = read_scores(True, fileid)
fileid = 'useW_and_reverse'
precision_a, recall_a, n_levels_a, lvl_scores_a, n_groups_a, lvl_scores_with_k_a = read_scores(True, fileid)

fig, axs = plt.subplots(2, 2, figsize=(16, 9), sharex=True)

colors = [color0, color1, color2]

#symmetric

axs[0, 0].plot(snrs, precision, color=color0, label='Precision')
axs[0, 0].plot(snrs, recall, color=color2, label='Recall')
# axs[0, 0].set_xlabel('SNR')
axs[0, 0].set_ylabel('AMI')
axs[0, 0].set_title('Assortative Hierarchy')
axs[0, 0].legend(frameon=False)

axs[0, 0].annotate('A', (-0.13, 1), xycoords='axes fraction', va='center', size=26, weight='bold')

rect = patches.Rectangle((-0.5,-0.05), 1.5, 1.5, color='black', alpha=0.1)
axs[0, 0].add_patch(rect)
axs[0, 0].text(1, 0.78, 'Detectability Limit', rotation=90, va='center',
               ha='right', fontstyle='italic', alpha=0.5)
axs[0, 0].set_ylim(-0.05, 1.18)
axs[0, 0].set_xlim(-0.5, 10.5)

ax2 = axs[0, 0].twinx()
ax2.plot(snrs, n_levels, '*', color=color1)
ax2.set_ylabel('# of Levels', color=color1)
ax2.spines['right'].set_visible(True)
ax2.tick_params(axis='y', labelcolor=color1)
ax2.set_ylim(-0.05, 3.5)

group_sizes = [2, 4, 8]
for i in range(3):
    axs[1, 0].plot(snrs, lvl_scores[:, i], color=colors[i], label=f'k={group_sizes[i]:3}')
    axs[1, 0].plot(snrs, lvl_scores_with_k[:, i], color=colors[i], ls=':')
axs[1, 0].set_xlabel('SNR')
axs[1, 0].set_ylabel('AMI')
axs[1, 0].legend(frameon=False)

rect = patches.Rectangle((-0.5,-0.05), 1.5, 1.5, color='black', alpha=0.1)
axs[1, 0].add_patch(rect)
axs[1, 0].text(1, 0.7, 'Detectability Limit', rotation=90, va='center',
               ha='right', fontstyle='italic', alpha=0.5)
axs[1, 0].set_ylim(-0.05, 1.05)
axs[1, 0].set_xlim(-0.5, 10.5)

axs[1, 0].annotate('C', (-0.13, 1), xycoords='axes fraction', va='center', size=26, weight='bold')

# asymmetric

axs[0, 1].plot(snrs, precision_a, color=color0, label='Precision')
axs[0, 1].plot(snrs, recall_a, color=color2, label='Recall')
# axs[0, 1].set_xlabel('SNR')
axs[0, 1].set_ylabel('AMI')
axs[0, 1].set_title('Disassortative Hierarchy')
axs[0, 1].legend(frameon=False)

axs[0, 1].annotate('B', (-0.13, 1), xycoords='axes fraction', va='center', size=26, weight='bold')

rect = patches.Rectangle((-0.5,-0.05), 1.5, 1.5, color='black', alpha=0.1)
axs[0, 1].add_patch(rect)
axs[0, 1].text(1, 0.78, 'Detectability Limit', rotation=90, va='center',
               ha='right', fontstyle='italic', alpha=0.5)
axs[0, 1].set_ylim(-0.05, 1.18)
axs[0, 1].set_xlim(-0.5, 10.5)

ax2 = axs[0, 1].twinx()
ax2.plot(snrs, n_levels_a, '*', color=color1)
ax2.set_ylabel('# of Levels', color=color1)
ax2.spines['right'].set_visible(True)
ax2.tick_params(axis='y', labelcolor=color1)
ax2.set_ylim(-0.05, 3.5)

group_sizes = [2, 4, 8]
for i in range(3):
    axs[1, 1].plot(snrs, lvl_scores_a[:, i], color=colors[i], label=f'k={group_sizes[i]:3}')
    axs[1, 1].plot(snrs, lvl_scores_with_k_a[:, i], color=colors[i], ls=':')
axs[1, 1].set_xlabel('SNR')
axs[1, 1].set_ylabel('AMI')
axs[1, 1].legend(frameon=False)

rect = patches.Rectangle((-0.5,-0.05), 1.5, 1.1, color='black', alpha=0.1)
axs[1, 1].add_patch(rect)
axs[1, 1].text(1, 0.7, 'Detectability Limit', rotation=90, va='center',
               ha='right', fontstyle='italic', alpha=0.5)
axs[1, 1].set_ylim(-0.05, 1.05)
axs[1, 1].set_xlim(-0.5, 10.5)

axs[1, 1].annotate('D', (-0.13, 1), xycoords='axes fraction', va='center', size=26, weight='bold')

plt.tight_layout()