This notebook aims to compare the results of entropic OTC and exact OTC, demonstrating that the entropic OTC converges to the exact OTC. Two examples are provided to illustrate this convergence.

Example 1

In [1]:
import numpy as np
import networkx as nx
import scipy.special

def generate_connected_networks_with_node_features(seed, num_networks, poisson_lambda, node_means, node_variances, edge_lambda):
    # Set a random seed for reproducibility
    np.random.seed(seed)

    # Initialize empty lists
    adj_list = []
    node_feat_list = []

    # Initialize counter for total attempts
    total_attempts = 0

    def generate_connected_adjacency_matrix(n, edge_lambda):
        # Sample the expected number of edges for each node
        expected_edges = np.random.poisson(edge_lambda, size=n)
        
        # Calculate the probability of edge formation for each direction
        p = expected_edges / (n - 1)
        
        # Initialize adjacency matrix
        A = np.zeros((n, n), dtype=int)
        
        # Sample edges
        for i in range(n):
            for j in range(i+1, n):
                A[i, j] = np.random.binomial(1, p[i])
                A[j, i] = np.random.binomial(1, p[j])
        
        # Final adjacency matrix where adj[i, j] = A[i, j] + A[j, i]
        adj = A + A.T

        # Ensure no self-loops
        np.fill_diagonal(adj, 0)

        return adj

    while len(adj_list) < num_networks:
        # Increment total attempts
        total_attempts += 1
        
        # Sample an integer n from Poisson distribution with given lambda
        n = np.random.poisson(poisson_lambda)
        
        # Generate the adjacency matrix adj
        adj = generate_connected_adjacency_matrix(n, edge_lambda)
        
        # Check if adj is connected
        G = nx.from_numpy_array(adj)
        if nx.is_connected(G):
            adj_list.append(adj)
            
            # Generate node features with the specified means and variances
            node_feat = np.column_stack([
                np.random.normal(mean, np.sqrt(var), n)
                for mean, var in zip(node_means, node_variances)
            ])
            
            # Round the elements in node_feat to 2 decimal places
            node_feat = np.round(node_feat, 2)
            
            node_feat_list.append(node_feat)

    # Calculate success rate
    success_rate = len(adj_list) / total_attempts

    return adj_list, node_feat_list, success_rate

def graph_convolution(adj_list, node_feat_list):
    conv_results = []
    
    for adj, X in zip(adj_list, node_feat_list):
        n = adj.shape[0]
        I = np.eye(n)
        adj_hat = adj + I
        D = np.diag(np.sum(adj_hat, axis=1))
        D_inv_sqrt = np.linalg.inv(np.sqrt(D))
        X_conv = D_inv_sqrt @ adj_hat @ D_inv_sqrt @ X
        conv_results.append(X_conv.mean(axis=0))
        
    return conv_results

def generate_graph_discrete_labels(adj_list, node_feat_list, coeffs, error_variance, seed):
    np.random.seed(seed)
    conv_results = graph_convolution(adj_list, node_feat_list)
    graph_labels = []
    
    for conv_result in conv_results:
        linear_combination = np.dot(conv_result, coeffs)
        error = np.random.normal(0, np.sqrt(error_variance))
        label_prob = scipy.special.expit(linear_combination + error)
        label = np.random.binomial(1, label_prob)
        graph_labels.append(label)
    
    return graph_labels

def generate_graph_continuous_labels(adj_list, node_feat_list, coeffs, error_variance, seed):
    np.random.seed(seed)
    conv_results = graph_convolution(adj_list, node_feat_list)
    graph_labels = []
    
    for conv_result in conv_results:
        linear_combination = np.dot(conv_result, coeffs)
        error = np.random.normal(0, np.sqrt(error_variance))
        label = linear_combination + error
        graph_labels.append(label)
    
    return graph_labels

# Example usage
seed = 921
num_networks = 100
poisson_lambda = 35
edge_lambda = 3  # Expected number of edges for each node
node_means = [9, 7, 10]  # Means for each feature column
node_variances = [16, 9, 25]  # Variances for each feature column

adj_list, node_feat_list, success_rate = generate_connected_networks_with_node_features(seed, num_networks, poisson_lambda, node_means, node_variances, edge_lambda)

# Coefficients for the linear combination of conv_results
coeffs = [4, -3, 0]
error_variance = 2.5

graph_labels = generate_graph_continuous_labels(adj_list, node_feat_list, coeffs, error_variance, seed)
graph_labels = np.round(graph_labels, 2)

# Output the number of generated networks, the success rate, and graph labels
print(f"Generated {len(adj_list)} connected networks.")
print(f"Success rate: {success_rate:.2%}")
print(f"Graph labels: {graph_labels}")

Generated 100 connected networks.
Success rate: 80.00%
Graph labels: [11.85 16.05 17.85 18.5  14.37 15.4  14.36 18.59 18.23 16.55  8.05 12.
 11.44 14.46 11.91 16.61 20.83 12.14 15.37 13.11 18.56 19.06  9.51  6.31
 12.39 14.08 20.44 15.8  15.77 16.93 15.91  9.63 16.73 11.48 17.99  9.74
 17.23 15.63 13.39 17.48 15.93 11.53 19.55 10.79 10.59 12.69  6.41 22.56
 14.11  8.9  11.92 10.26 15.42 11.96 18.64 13.33 14.07 11.12 19.68 22.2
 11.76 13.47 14.42 23.14 11.94 17.48 16.33 12.02 12.   20.8  12.6  15.8
 16.8  15.25 13.27 24.46 26.92 10.78 16.4  12.3  12.7  13.93 18.33 17.76
 13.83 14.18 12.46 10.98 11.39 11.3  16.08 11.76 18.15 14.05 19.41 22.02
 11.41 17.2  12.87 14.83]


In [2]:
from entropic_otc import entropic_otc
from exact_otc import exact_otc
import utils

In [3]:
P_list = [utils.adj_to_trans1(A) for A in adj_list]
feat1_list = [x[:,0] for x in node_feat_list]
feat2_list = [x[:,1] for x in node_feat_list]
feat3_list = [x[:,2] for x in node_feat_list]

- L = 25, T = 50, xi = 0.1, sink_iter = 10

In [16]:
cost = utils.get_sq_cost(feat1_list[3], feat1_list[6])

import time
start_time = time.time()
[result_e1, P_e1, stat_dist_e1] = entropic_otc(P_list[3], P_list[6], cost, get_sd = False, L = 25, T = 50, xi = 0.1, sink_iter = 10)
end_time = time.time()
print(f"Elapsed time: {end_time - start_time} seconds")

start_time = time.time()
[result_e2, P_e2, stat_dist_e2] = entropic_otc(P_list[3], P_list[6], cost, get_sd = True, L = 25, T = 50, xi = 0.1, sink_iter = 10)
end_time = time.time()
print(f"Elapsed time: {end_time - start_time} seconds")

start_time = time.time()
[result_n, P_n, stat_dist_n] = exact_otc(P_list[3], P_list[6], cost)
end_time = time.time()
print(f"Elapsed time: {end_time - start_time} seconds")

result_e1, result_e2, result_n

Elapsed time: 2.8563480377197266 seconds
Elapsed time: 4.662265777587891 seconds
Elapsed time: 3.7100069522857666 seconds


(17.024085467032634, 17.023989168598455, 10.850993850826315)

- L = 25, T = 50, xi = 1, sink_iter = 100

In [None]:
start_time = time.time()
[result, P, stat_dist] = entropic_otc(P_list[3], P_list[6], cost, get_sd = True, L = 25, T = 50, xi = 1, sink_iter = 100)
end_time = time.time()
print(f"Elapsed time: {end_time - start_time} seconds")
print(result)

Elapsed time: 11.05821704864502 seconds
13.658963944342535


- L = 25, T = 50, xi = 1, sink_iter = 1000

In [9]:
start_time = time.time()
[result, P, stat_dist] = entropic_otc(P_list[3], P_list[6], cost, get_sd = True, L = 25, T = 50, xi = 1, sink_iter = 1000)
end_time = time.time()
print(f"Elapsed time: {end_time - start_time} seconds")
print(result)

Elapsed time: 85.70026803016663 seconds
10.955095752170038


Example 2

In [11]:
n = 15

A1 = np.zeros((n + 1, n + 1), dtype=int)
A1[1:(n + 1), 0] = 1
A1[2:(n + 1), 1:n] = np.eye(n - 1, dtype=int)
A1[n, 1] = 1
A1 = A1 + A1.T

A2 = A1.copy()
A2[1, 2] = 0
A2[2, 1] = 0

A3 = A1.copy()
A3[0, 1] = 0
A3[1, 0] = 0

P1 = utils.adj_to_trans1(A1)
P2 = utils.adj_to_trans1(A2)
P3 = utils.adj_to_trans1(A3)

c12 = utils.get_degree_cost(A1, A2)
c13 = utils.get_degree_cost(A1, A3)

In [12]:
[result_12, P12, stat_dist_12] = exact_otc(P1, P2, c12)
[result_13, P13, stat_dist_13] = exact_otc(P1, P3, c13)
result_12, result_13

(2.5517241379310405, 2.6551724137931036)

- L = 25, T = 50, xi = 0.1, sink_iter = 10

In [15]:
[result_e12, Pe12, stat_dist_e12] = entropic_otc(P1, P2, c12, get_sd = True, L = 25, T = 50, xi = 0.1, sink_iter = 10)
[result_e13, Pe13, stat_dist_e13] = entropic_otc(P1, P3, c13, get_sd = True, L = 25, T = 50, xi = 0.1, sink_iter = 10)
result_e12, result_e13

(2.553874249034113, 2.655272554155938)