In [33]:
# import warnings
# warnings.filterwarnings('ignore')
%load_ext autoreload
%autoreload 2

import pickle
import numpy as np
import scipy.sparse as sp
from scipy.sparse import load_npz

import torch

from cell import utils
from cell.utils import link_prediction_performance
from cell.cell import Cell, EdgeOverlapCriterion, LinkPredictionCriterion
from cell.graph_statistics import compute_graph_statistics

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [34]:
#train_graph 

_A_obs, _X_obs, _z_obs = utils.load_npz('../data/cora_ml.npz')
# with open('./data/link_prediction.p', 'rb') as handle:
#     val_ones, val_zeros, test_ones, test_zeros = pickle.load(handle)

In [37]:
_A_obs = _A_obs + _A_obs.T
_A_obs[_A_obs > 1] = 1
lcc = utils.largest_connected_components(_A_obs)
_A_obs = _A_obs[lcc,:][:,lcc]
_N = _A_obs.shape[0]

Selecting 1 largest connected components


In [38]:
train_graph = _A_obs

Edge overlap

In [14]:

# initialize model with EO-criterion
model = Cell(A=train_graph,
             H=9,
             callbacks=[EdgeOverlapCriterion(invoke_every=10, edge_overlap_limit=.5)])

In [15]:
# train model 
model.train(steps=200,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-7})

Step:  10/200 Loss: 6.11672 Edge-Overlap: 0.044 Total-Time: 3
Step:  20/200 Loss: 4.20829 Edge-Overlap: 0.291 Total-Time: 7
Step:  30/200 Loss: 3.49104 Edge-Overlap: 0.446 Total-Time: 11
Step:  40/200 Loss: 3.16307 Edge-Overlap: 0.540 Total-Time: 15


In [16]:

generated_graph = model.sample_graph()

In [17]:
compute_graph_statistics(generated_graph)

{'d_max': 191.0,
 'd_min': 1.0,
 'd': 5.447746243739566,
 'LCC': 2946,
 'wedge_count': 109583.0,
 'claw_count': 2101324.0,
 'triangle_count': 2547,
 'square_count': 15225.0,
 'power_law_exp': 1.7618571951562532,
 'gini': 0.4648573805897793,
 'rel_edge_distr_entropy': 0.9481511142508072,
 'assortativity': -0.0741220706786816,
 'clustering_coefficient': 0.06972796875427759,
 'cpl': 5.156273311283376}

In [18]:
compute_graph_statistics(train_graph)

{'d_max': 246.0,
 'd_min': 1.0,
 'd': 5.4477463,
 'LCC': 2810,
 'wedge_count': 138083.0,
 'claw_count': 3930549.0,
 'triangle_count': 5308,
 'square_count': 34607.0,
 'power_law_exp': 1.8000197248954521,
 'gini': 0.502809004629355,
 'rel_edge_distr_entropy': 0.9374350325221678,
 'assortativity': -0.07206001964302333,
 'clustering_coefficient': 0.11532194404814496,
 'cpl': 5.271030236946822}

validation criteria

In [40]:
val_share = 0.1
test_share = 0.05
seed = 481516234

In [41]:
train_ones, val_ones, val_zeros, test_ones, test_zeros = utils.train_val_test_split_adjacency(_A_obs, val_share, test_share, seed, undirected=True, connected=True, asserts=True)

In [21]:
# with open('../data/link_prediction.p', 'rb') as handle:
#     val_ones, val_zeros, test_ones, test_zeros = pickle.load(handle)

In [42]:

# initialize model with LP-criterion
#
model = Cell(A=train_graph,
             H=9,
             callbacks=[LinkPredictionCriterion(invoke_every=2,
                                                val_ones=val_ones,
                                            val_zeros=val_zeros,
                                                max_patience=3)])

In [43]:

# train model 
model.train(steps=200,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-6})

Step:   2/200 Loss: 7.92807 ROC-AUC Score: 0.874 Average Precision: 0.883 Total-Time: 0
Step:   4/200 Loss: 7.73430 ROC-AUC Score: 0.928 Average Precision: 0.935 Total-Time: 1
Step:   6/200 Loss: 7.32484 ROC-AUC Score: 0.945 Average Precision: 0.949 Total-Time: 1
Step:   8/200 Loss: 6.72738 ROC-AUC Score: 0.958 Average Precision: 0.961 Total-Time: 2
Step:  10/200 Loss: 6.06650 ROC-AUC Score: 0.972 Average Precision: 0.974 Total-Time: 2
Step:  12/200 Loss: 5.48931 ROC-AUC Score: 0.983 Average Precision: 0.985 Total-Time: 3
Step:  14/200 Loss: 5.05906 ROC-AUC Score: 0.990 Average Precision: 0.991 Total-Time: 3
Step:  16/200 Loss: 4.73280 ROC-AUC Score: 0.992 Average Precision: 0.993 Total-Time: 4
Step:  18/200 Loss: 4.45470 ROC-AUC Score: 0.992 Average Precision: 0.993 Total-Time: 4
Step:  20/200 Loss: 4.22970 ROC-AUC Score: 0.993 Average Precision: 0.993 Total-Time: 5
Step:  22/200 Loss: 4.04646 ROC-AUC Score: 0.994 Average Precision: 0.994 Total-Time: 5
Step:  24/200 Loss: 3.89095 ROC-

In [44]:
link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.999975, 0.9999752475247525)

In [55]:
_A_obs, _X_obs, _z_obs = utils.load_npz('../data/citeseer.npz')
_A_obs = _A_obs + _A_obs.T
_A_obs[_A_obs > 1] = 1
lcc = utils.largest_connected_components(_A_obs)
_A_obs = _A_obs[lcc,:][:,lcc]
_N = _A_obs.shape[0]

Selecting 1 largest connected components


In [56]:
# initialize model with EO-criterion
model = Cell(A=_A_obs,
             H=9,
             callbacks=[EdgeOverlapCriterion(invoke_every=10, edge_overlap_limit=.5)])

In [57]:
# train model 
model.train(steps=200,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-7})

Step:  10/200 Loss: 5.31182 Edge-Overlap: 0.047 Total-Time: 2
Step:  20/200 Loss: 3.15629 Edge-Overlap: 0.351 Total-Time: 4
Step:  30/200 Loss: 2.32542 Edge-Overlap: 0.597 Total-Time: 6


In [58]:
generated_graph = model.sample_graph()

In [59]:
compute_graph_statistics(generated_graph)

{'d_max': 55.0,
 'd_min': 1.0,
 'd': 3.5014218009478673,
 'LCC': 2066,
 'wedge_count': 19756.0,
 'claw_count': 85692.0,
 'triangle_count': 426,
 'square_count': 1505.0,
 'power_law_exp': 2.018716157596158,
 'gini': 0.39795839031912905,
 'rel_edge_distr_entropy': 0.9632865165023955,
 'assortativity': -0.017407390313332816,
 'clustering_coefficient': 0.06468920834176958,
 'cpl': 6.635365658145284}

In [60]:
compute_graph_statistics(_A_obs)

{'d_max': 99.0,
 'd_min': 1.0,
 'd': 3.5014217,
 'LCC': 2110,
 'wedge_count': 26160.0,
 'claw_count': 251101.0,
 'triangle_count': 1083,
 'square_count': 6130.0,
 'power_law_exp': 2.058415965970431,
 'gini': 0.4269812453652264,
 'rel_edge_distr_entropy': 0.9541777601995937,
 'assortativity': 0.007273127853861046,
 'clustering_coefficient': 0.12419724770642201,
 'cpl': 9.310486989858404}

In [66]:
val_share = 0.1
test_share = 0.05
seed = 48151626

In [67]:
train_ones, val_ones, val_zeros, test_ones, test_zeros = utils.train_val_test_split_adjacency(_A_obs, val_share, test_share, seed, undirected=True, connected=True, asserts=False)

In [71]:
# initialize model with LP-criterion
#
model = Cell(A=_A_obs,
             H=9,
             callbacks=[LinkPredictionCriterion(invoke_every=2,
                                                val_ones=val_ones,
                                            val_zeros=val_zeros,
                                                max_patience=3)])

In [72]:
# train model 
model.train(steps=200,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-6})

Step:   2/200 Loss: 7.63338 ROC-AUC Score: 0.915 Average Precision: 0.928 Total-Time: 0
Step:   4/200 Loss: 7.38481 ROC-AUC Score: 0.943 Average Precision: 0.953 Total-Time: 0
Step:   6/200 Loss: 6.88181 ROC-AUC Score: 0.947 Average Precision: 0.956 Total-Time: 0
Step:   8/200 Loss: 6.15669 ROC-AUC Score: 0.950 Average Precision: 0.957 Total-Time: 1
Step:  10/200 Loss: 5.36616 ROC-AUC Score: 0.960 Average Precision: 0.964 Total-Time: 1
Step:  12/200 Loss: 4.67934 ROC-AUC Score: 0.973 Average Precision: 0.975 Total-Time: 2
Step:  14/200 Loss: 4.19243 ROC-AUC Score: 0.984 Average Precision: 0.985 Total-Time: 2
Step:  16/200 Loss: 3.83087 ROC-AUC Score: 0.990 Average Precision: 0.991 Total-Time: 2
Step:  18/200 Loss: 3.50107 ROC-AUC Score: 0.994 Average Precision: 0.994 Total-Time: 3
Step:  20/200 Loss: 3.23473 ROC-AUC Score: 0.995 Average Precision: 0.995 Total-Time: 3
Step:  22/200 Loss: 3.00925 ROC-AUC Score: 0.997 Average Precision: 0.997 Total-Time: 3
Step:  24/200 Loss: 2.81438 ROC-

In [73]:
link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(1.0, 1.0)

In [74]:
_A_obs, _X_obs, _z_obs = utils.load_npz('../data/polblogs.npz')
_A_obs = _A_obs + _A_obs.T
_A_obs[_A_obs > 1] = 1
lcc = utils.largest_connected_components(_A_obs)
_A_obs = _A_obs[lcc,:][:,lcc]
_N = _A_obs.shape[0]

Selecting 1 largest connected components


In [75]:
# initialize model with EO-criterion
model = Cell(A=_A_obs,
             H=9,
             callbacks=[EdgeOverlapCriterion(invoke_every=10, edge_overlap_limit=.5)])

In [76]:
# train model 
model.train(steps=200,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-7})

Step:  10/200 Loss: 5.85236 Edge-Overlap: 0.322 Total-Time: 1
Step:  20/200 Loss: 5.50769 Edge-Overlap: 0.334 Total-Time: 2
Step:  30/200 Loss: 5.34249 Edge-Overlap: 0.370 Total-Time: 3
Step:  40/200 Loss: 5.26058 Edge-Overlap: 0.391 Total-Time: 4
Step:  50/200 Loss: 5.21466 Edge-Overlap: 0.402 Total-Time: 6
Step:  60/200 Loss: 5.18701 Edge-Overlap: 0.402 Total-Time: 7
Step:  70/200 Loss: 5.16819 Edge-Overlap: 0.405 Total-Time: 8
Step:  80/200 Loss: 5.15462 Edge-Overlap: 0.410 Total-Time: 9
Step:  90/200 Loss: 5.14412 Edge-Overlap: 0.416 Total-Time: 11
Step: 100/200 Loss: 5.13580 Edge-Overlap: 0.417 Total-Time: 12
Step: 110/200 Loss: 5.12906 Edge-Overlap: 0.410 Total-Time: 13
Step: 120/200 Loss: 5.12339 Edge-Overlap: 0.422 Total-Time: 14
Step: 130/200 Loss: 5.11848 Edge-Overlap: 0.424 Total-Time: 15
Step: 140/200 Loss: 5.11413 Edge-Overlap: 0.421 Total-Time: 17
Step: 150/200 Loss: 5.11028 Edge-Overlap: 0.423 Total-Time: 18
Step: 160/200 Loss: 5.10686 Edge-Overlap: 0.425 Total-Time: 19


In [77]:
generated_graph = model.sample_graph()

In [78]:
compute_graph_statistics(generated_graph)

{'d_max': 285.0,
 'd_min': 1.0,
 'd': 27.356792144026187,
 'LCC': 1220,
 'wedge_count': 1204509.0,
 'claw_count': 46968179.0,
 'triangle_count': 70833,
 'square_count': 3306353.0,
 'power_law_exp': 1.4029215956463672,
 'gini': 0.5996028538514901,
 'rel_edge_distr_entropy': 0.9113624629348419,
 'assortativity': -0.24596029072229966,
 'clustering_coefficient': 0.17641960334044826,
 'cpl': 2.6980342688386494}

In [79]:
compute_graph_statistics(_A_obs)

{'d_max': 351.0,
 'd_min': 1.0,
 'd': 27.35761,
 'LCC': 1222,
 'wedge_count': 1341632.0,
 'claw_count': 62803482.666666664,
 'triangle_count': 101043,
 'square_count': 5172122.0,
 'power_law_exp': 1.4142113430224184,
 'gini': 0.6219855773483856,
 'rel_edge_distr_entropy': 0.9026675551611921,
 'assortativity': -0.22127261479881782,
 'clustering_coefficient': 0.22594049635071317,
 'cpl': 2.7375296736998864}

In [80]:
val_share = 0.1
test_share = 0.05
seed = 48151627

In [81]:
train_ones, val_ones, val_zeros, test_ones, test_zeros = utils.train_val_test_split_adjacency(_A_obs, val_share, test_share, seed, undirected=True, connected=True, asserts=False)

In [82]:
# initialize model with LP-criterion
#
model = Cell(A=_A_obs,
             H=9,
             callbacks=[LinkPredictionCriterion(invoke_every=2,
                                                val_ones=val_ones,
                                            val_zeros=val_zeros,
                                                max_patience=3)])

In [83]:
# train model 
model.train(steps=200,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-6})

Step:   2/200 Loss: 7.09961 ROC-AUC Score: 0.859 Average Precision: 0.862 Total-Time: 0
Step:   4/200 Loss: 6.92702 ROC-AUC Score: 0.940 Average Precision: 0.934 Total-Time: 0
Step:   6/200 Loss: 6.49180 ROC-AUC Score: 0.946 Average Precision: 0.939 Total-Time: 0
Step:   8/200 Loss: 6.05247 ROC-AUC Score: 0.954 Average Precision: 0.948 Total-Time: 0
Step:  10/200 Loss: 5.89296 ROC-AUC Score: 0.962 Average Precision: 0.957 Total-Time: 1
Step:  12/200 Loss: 5.81134 ROC-AUC Score: 0.967 Average Precision: 0.962 Total-Time: 1
Step:  14/200 Loss: 5.71908 ROC-AUC Score: 0.969 Average Precision: 0.964 Total-Time: 1
Step:  16/200 Loss: 5.64729 ROC-AUC Score: 0.973 Average Precision: 0.968 Total-Time: 1
Step:  18/200 Loss: 5.58388 ROC-AUC Score: 0.976 Average Precision: 0.972 Total-Time: 2
Step:  20/200 Loss: 5.52527 ROC-AUC Score: 0.977 Average Precision: 0.975 Total-Time: 2
Step:  22/200 Loss: 5.47827 ROC-AUC Score: 0.979 Average Precision: 0.976 Total-Time: 2
Step:  24/200 Loss: 5.43891 ROC-

In [84]:
link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.9841029220533595, 0.9817612171309184)