In [None]:
import matplotlib.pyplot as plt

SMALL_SIZE = 12
MEDIUM_SIZE = 14
BIGGER_SIZE = 16

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

%matplotlib inline

In [None]:
import numpy as np
import networkx as nx
from collections import Counter

In [None]:
import sys
sys.path.append('../src')
from random_nested_hypergraph import random_nested_hypergraph
from random_degree_model import random_degree_hypergraph
from encapsulation_dag import encapsulation_dag, overlap_dag, overlap_graph

In [None]:
N = 20
max_size = 4
H = 5
epsilons = {2: 1.0, 3: 1.0, 4:1.0}
max_size_overlap = 1

In [None]:
def get_plot_dag(N, max_size, H, epsilons, max_size_overlap=-1):
    hyperedges = random_nested_hypergraph(N, max_size, H, epsilons, max_size_overlap=max_size_overlap)
    dag, nth, he_map = encapsulation_dag(hyperedges)
    size_dict = dict()
    for node in dag.nodes():
        size_dict[node] = len(node)

    nx.set_node_attributes(dag, size_dict, name="subset")
    return dag, hyperedges

In [None]:
def get_plot_overdag(N, max_size, H, epsilons, max_size_overlap=-1):
    hyperedges = random_nested_hypergraph(N, max_size, H, epsilons, max_size_overlap=max_size_overlap)
    dag, nth, he_map = overlap_dag(hyperedges)
    size_dict = dict()
    for node in dag.nodes():
        size_dict[node] = len(node)

    nx.set_node_attributes(dag, size_dict, name="subset")
    return dag, hyperedges

In [None]:
def get_plot_overlap(N, max_size, H, epsilons, max_size_overlap=-1):
    hyperedges = random_nested_hypergraph(N, max_size, H, epsilons, max_size_overlap=max_size_overlap)
    dag, nth, he_map = overlap_graph(hyperedges)
    size_dict = dict()
    for node in dag.nodes():
        size_dict[node] = len(node)

    nx.set_node_attributes(dag, size_dict, name="subset")
    return dag, hyperedges

In [None]:
# Varying both
fig, axs = plt.subplots(nrows=3, ncols=3, figsize=(12, 6), squeeze=False)

for row_idx, ep2 in enumerate([1.0, 0.5, 0.0]):
    for col_idx, ep3 in enumerate([1.0, 0.5, 0.0]):
        epsilons = {2: ep2, 3: ep3}
        dag, _ = get_plot_dag(N, max_size, H, epsilons, max_size_overlap=max_size_overlap)
        axs[row_idx][col_idx].set_axis_off()
        multipart_layout = nx.multipartite_layout(dag, align="horizontal")
        nx.draw_networkx(dag, node_size=40, arrowsize=2, pos=multipart_layout, ax=axs[row_idx][col_idx],
                         with_labels=False, node_color="dodgerblue", alpha=0.5)
        axs[row_idx][col_idx].set(title=fr"$\epsilon_2={ep2}, \epsilon_3={ep3}$")
        axs[row_idx][col_idx].title.set_fontsize(15)
        if col_idx == 0:
            # Annoying one-liner to get the y-position of each layer
            y_dict = {size:[multipart_layout[node]
                            for node in dag.nodes()
                            if len(node) == size][0][1]
                      for size in range(2, max_size+1)}
            for size in y_dict:
                axs[row_idx][col_idx].text(-1.55, y_dict[size], fr"$s={size}$", size=13)
            #if row_idx == 0:
            #    axs[row_idx][col_idx].text(-1.8, y_dict[max(y_dict.keys())]+0.05, "Hyperedge Size", size=15)
fig.subplots_adjust(wspace=0.0)
fig.suptitle(r"Encapsulation DAGs for Varying $\epsilon_s$", size=16)
fig.tight_layout()
#fig.savefig("../results/plots/nested-dag-example.pdf")

In [None]:
# Overlap DAG
with_uniform = False
colors = {1:"#e5d262", 2:"#7de5b8", 3: "#9be95d"}
for num in range(1):
    if with_uniform:
        fig, axs = plt.subplots(nrows=3, ncols=4, figsize=(12, 6), squeeze=False)
    else:
        fig, axs = plt.subplots(nrows=3, ncols=3, figsize=(12, 6), squeeze=False)
    for row_idx, ep2 in enumerate([1.0, 0.5, 0.0]):
        for col_idx, ep3 in enumerate([1.0, 0.5, 0.0]):
            epsilons = {2: ep2, 3: ep3}
            dag, hyperedges = get_plot_overdag(N, max_size, H, epsilons, max_size_overlap = 1)
            multipart_layout = nx.multipartite_layout(dag, align="horizontal")
            nx.draw_networkx_nodes(dag, multipart_layout, node_size=40,
                                   node_color="dodgerblue", alpha=0.5, ax=axs[row_idx][col_idx])
            edgelist = [(u,v) for u,v,data in dag.edges(data=True) if data["weight"] < 1.0]
            edge_color = np.array([float(dag.edges[(u,v)]["weight"]) for u,v in edgelist])
            #edge_color = [colors[dag.edges[(u,v)]["overlap"]] for u,v in edgelist]
            nx.draw_networkx_edges(dag, multipart_layout, edgelist=edgelist,
                                   edge_color=edge_color, edge_cmap=plt.cm.Greens,
                                   arrowsize=2, ax=axs[row_idx][col_idx], alpha=0.5)
            edgelist = [(u,v) for u,v,data in dag.edges(data=True) if data["weight"] == 1.0]
            nx.draw_networkx_edges(dag, multipart_layout, edgelist=edgelist, edge_color="black",
                                   arrowsize=2, ax=axs[row_idx][col_idx], alpha=0.4)
            axs[row_idx][col_idx].set(title=fr"$\epsilon_2={ep2}, \epsilon_3={ep3}$")
            if col_idx == 0:
                # Annoying one-liner to get the y-position of each layer
                y_dict = {size:[multipart_layout[node]
                                for node in dag.nodes()
                                if len(node) == size][0][1]
                          for size in range(2, max_size+1)}
                for size in y_dict:
                    axs[row_idx][col_idx].text(-1.55, y_dict[size], fr"$s={size}$")

    if with_uniform:
        # Add uniform random for comparison
        size_list = [len(he) for he in hyperedges]
        size_dist = Counter(size_list)
        uniform_hyperedges = random_degree_hypergraph(N, size_dist, correlation="uncorrelated")
        dag, nth, he_map = overlap_dag(uniform_hyperedges)
        size_dict = dict()
        for node in dag.nodes():
            size_dict[node] = len(node)

        nx.set_node_attributes(dag, size_dict, name="subset")
        col_idx = 3
        row_idx = 1
        axs[row_idx][col_idx].set_axis_off()
        multipart_layout = nx.multipartite_layout(dag, align="horizontal")
        nx.draw_networkx_nodes(dag, multipart_layout, node_size=40,
                               node_color="dodgerblue", alpha=0.5, ax=axs[row_idx][col_idx])
        edgelist = [(u,v) for u,v,data in dag.edges(data=True) if data["weight"] < 1.0]
        edge_color = np.array([float(dag.edges[(u,v)]["weight"]) for u,v in edgelist])
        #edge_color = [colors[dag.edges[(u,v)]["overlap"]] for u,v in edgelist]
        nx.draw_networkx_edges(dag, multipart_layout, edgelist=edgelist,
                               edge_color=edge_color, edge_cmap=plt.cm.Greens,
                               arrowsize=2, ax=axs[row_idx][col_idx], alpha=0.5)
        edgelist = [(u,v) for u,v,data in dag.edges(data=True) if data["weight"] == 1.0]
        nx.draw_networkx_edges(dag, multipart_layout, edgelist=edgelist, edge_color="black",
                               arrowsize=2, ax=axs[row_idx][col_idx], alpha=0.4)
        axs[row_idx][col_idx].set(title=fr"Uniform")

    for row_idx in range(3):
        for col_idx in range(3):
            axs[row_idx][col_idx].set_axis_off()

    fig.subplots_adjust(wspace=0.0)
    fig.suptitle(r"Overlap Structures for Varying $\epsilon_s$", size=16)
    fig.tight_layout()
    #fig.savefig(f"../results/plots/nested-overdag-example.pdf")

# Average DAG edges + overlap size

In [None]:
from collections import defaultdict

In [None]:
colors = {1:"#e5d262", 2:"#7de5b8", 3: "#9be95d"}
eps_vals = [0.0, 0.25, 0.5, 0.75, 1.0]
dag_edges = {ep:defaultdict(list) for ep in eps_vals}
overlap_edges = {ep:defaultdict(list) for ep in eps_vals}


for num in range(1000):
    for row_idx, ep2 in enumerate(eps_vals):
        for col_idx, ep3 in enumerate(eps_vals):
            epsilons = {2: ep2, 3: ep3}
            dag, hyperedges = get_plot_overdag(N, max_size, H, epsilons, max_size_overlap = max_size_overlap)
            edgelist = [(u,v) for u,v,data in dag.edges(data=True) if data["weight"] < 1.0]
            overlap_edges[ep2][ep3].append(len(edgelist))
            edge_color = np.array([float(dag.edges[(u,v)]["weight"]) for u,v in edgelist])
            edgelist = [(u,v) for u,v,data in dag.edges(data=True) if data["weight"] == 1.0]
            dag_edges[ep2][ep3].append(len(edgelist))
            
            
overlap_arr = np.zeros((len(eps_vals), len(eps_vals)))
dag_arr = np.zeros((len(eps_vals), len(eps_vals)))
for row_idx, ep2 in enumerate(eps_vals):
    for col_idx, ep3 in enumerate(eps_vals):
        dag_arr[row_idx][col_idx] = np.mean(dag_edges[ep2][ep3])
        overlap_arr[row_idx][col_idx] = np.mean(overlap_edges[ep2][ep3])

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(4,7), squeeze=False)

row_idx = 0
col_idx = 0
hm = axs[row_idx][col_idx].imshow(dag_arr, origin="lower")
fig.colorbar(hm, ax=axs[row_idx][col_idx], location='right', shrink=0.9)
axs[row_idx][col_idx].set(xticks=list(range(len(eps_vals))), yticks=list(range(len(eps_vals))),
                          xticklabels=eps_vals, yticklabels=eps_vals,
                         title="DAG Edges", xlabel=r"$\epsilon_3$", ylabel=r"$\epsilon_2$")

row_idx = 1
hm = axs[row_idx][col_idx].imshow(overlap_arr, origin="lower")
fig.colorbar(hm, ax=axs[row_idx][col_idx], location='right', shrink=0.9)
axs[row_idx][col_idx].set(xticks=list(range(len(eps_vals))), yticks=list(range(len(eps_vals))),
                          xticklabels=eps_vals, yticklabels=eps_vals,
                          title="Overlap Edges (excluding DAG)", xlabel=r"$\epsilon_3$", ylabel=r"$\epsilon_2$")
fig.tight_layout()
#fig.savefig("../results/plots/edge-overlap-heatmaps.pdf")

# Visualizing random degree hypergraphs + distributions

In [None]:
import hypernetx as hnx
from encapsulation_dag import get_overlap_dists

In [None]:
# Hypergraph model parameters
N = 25
size_distribution = {
    5: 4,
    4: 12,
    3: 14,
    2: 16
}
first_N = 8

In [None]:
def get_connected_hyperedges(N, size_distribution, correlation, first_N=0):
    hyperedges = random_degree_hypergraph(N, size_distribution, correlation=correlation, first_N=first_N)
    while not check_hyperedges_connectivity(hyperedges):
        hyperedges = random_degree_hypergraph(N, size_distribution, correlation=correlation, first_N=first_N)
    return hyperedges

In [None]:
correlation = "uncorrelated"
hyperedges = get_connected_hyperedges(N, size_distribution, correlation)
    
hyperedges_as_strings = []
for he in hyperedges:
    hyperedges_as_strings.append((str(node) for node in he))
hypergraph = {idx: he for idx, he in enumerate(hyperedges_as_strings)}
fig, axs = plt.subplots(1, 2, figsize=(7, 4), squeeze=False)
hnx.draw(hnx.Hypergraph(hypergraph), ax=axs[0][1])
H_xgi = xgi.Hypergraph(hyperedges)
axs[0][0].hist(list(H_xgi.degree().values()))
axs[0][0].set(xlabel="Degree", ylabel="# Nodes")

In [None]:
correlation = "positive"
hyperedges = get_connected_hyperedges(N, size_distribution, correlation)

hyperedges_as_strings = []
for he in hyperedges:
    hyperedges_as_strings.append((str(node) for node in he))
hypergraph = {idx: he for idx, he in enumerate(hyperedges_as_strings)}
fig, axs = plt.subplots(1, 2, figsize=(7, 4), squeeze=False)
hnx.draw(hnx.Hypergraph(hypergraph), ax=axs[0][1])
H_xgi = xgi.Hypergraph(hyperedges)
axs[0][0].hist(list(H_xgi.degree().values()))
axs[0][0].set(xlabel="Degree", ylabel="# Nodes")

In [None]:
def get_overlap_plot_data(dag):
    xs = []
    ys = []
    labels = []
    overlap_dists = get_overlap_dists(dag)
    for m in sorted(overlap_dists.keys(), reverse=True):
        x = list(range(2, m))
        y = np.zeros(len(x))
        for n in range(2, m):
            if n in overlap_dists[m]:
                y[n-2] = np.mean(overlap_dists[m][n])

        xs.append(list(x))
        ys.append(list(y))
        labels.append(m)
    return xs, ys, labels

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(7, 4), squeeze=False, sharey=True)
for cidx, correlation in enumerate(["uncorrelated", "positive"]):
    hyperedges = get_connected_hyperedges(N, size_distribution, correlation, first_N)
    dag, _, _ = encapsulation_dag(hyperedges)
    xs, ys, labels = get_overlap_plot_data(dag)
    for i in range(len(xs)):
        axs[0][cidx].plot(xs[i], ys[i], 'o-', label=labels[i])
    axs[0][cidx].legend()
    axs[0][cidx].set(title=correlation,
                  xlabel="n",
                  ylabel="Avg # Encapsulated",
                  xticks=list(range(2, 5))
                 )

In [None]:
ep2 = 0.0
x = []
y = []
y_std = []
for ep3 in sorted(dag_edges[ep2].keys()):
    x.append(ep3)
    y.append(np.mean(dag_edges[ep2][ep3]))
    y_std.append(np.std(dag_edges[ep2][ep3]))
    
plt.errorbar(x, y, yerr=y_std, marker="o")
plt.xticks([ep3 for ep3 in sorted(dag_edges[ep2].keys())])
plt.xlabel(r"$\epsilon_3$")
plt.title(fr"$\epsilon_2={ep2}$")

In [None]:
ep2 = 1.0
x = []
y = []
y_std = []
for ep3 in sorted(overlap_edges[ep2].keys()):
    x.append(ep3)
    y.append(np.mean(overlap_edges[ep2][ep3]))
    y_std.append(np.std(overlap_edges[ep2][ep3]))
    
plt.errorbar(x, y, yerr=y_std, marker="o")
plt.xticks([ep3 for ep3 in sorted(overlap_edges[ep2].keys())])
plt.xlabel(r"$\epsilon_3$")
plt.title(fr"$\epsilon_2={ep2}$")

# In real data

In [None]:
from utils import read_data

In [None]:
hyperedges = read_data("../data/coauth-MAG-Geology-full/coauth-MAG-Geology-full-")
hyperedges = list(set([tuple(he) for he in hyperedges if 2 < len(he) < 7]))

In [None]:
dag, nth, he_map = encapsulation_dag(hyperedges)
size_dict = dict()
for node in dag.nodes():
    size_dict[node] = len(node)

nx.set_node_attributes(dag, size_dict, name="subset")

In [None]:
multipart_layout = nx.multipartite_layout(dag, align="horizontal")

In [None]:
fig, axs = plt.subplots(nrows=1, ncols=1, figsize=(12, 8), squeeze=False)
row_idx = col_idx = 0
nx.draw_networkx(dag, node_size=40, pos=multipart_layout,
                with_labels=False, node_color="dodgerblue", alpha=0.5, ax=axs[row_idx][col_idx])