In [1]:
# import warnings
# warnings.filterwarnings('ignore')
%load_ext autoreload
%autoreload 2

import pickle
import numpy as np
import scipy.sparse as sp
from scipy.sparse import load_npz
import pandas as pd

import torch

from cell import utils
from cell.utils import link_prediction_performance, edge_overlap
from cell.cell import Cell, EdgeOverlapCriterion, LinkPredictionCriterion
from cell.graph_statistics import compute_graph_statistics

# CORA ML

In [34]:
#train_graph 
_A_obs, _X_obs, _z_obs = utils.load_npz('../data/cora_ml.npz')
_A_obs = _A_obs + _A_obs.T
_A_obs[_A_obs > 1] = 1
lcc = utils.largest_connected_components(_A_obs)
_A_obs = _A_obs[lcc,:][:,lcc]
_N = _A_obs.shape[0]

Selecting 1 largest connected components


In [35]:
val_share = 0.05
test_share = 0.1
seed = 42 #481516234

In [36]:
train_ones, val_ones, val_zeros, test_ones, test_zeros = utils.train_val_test_split_adjacency(_A_obs, val_share, test_share, seed, undirected=True, connected=True, asserts=True)

In [37]:
train_graph = sp.coo_matrix((np.ones(len(train_ones)),(train_ones[:,0], train_ones[:,1]))).tocsr()
assert (train_graph.toarray() == train_graph.toarray().T).all()

### Edge overlap

#### CELL

In [6]:
# initialize model with EO-criterion
model = Cell(A=train_graph,
             H=9,
             g_type='cell',
             callbacks=[EdgeOverlapCriterion(invoke_every=10, edge_overlap_limit=.5)])

# train model 
model.train(steps=200,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-5})

Step:  10/200 Loss: 6.03156 Edge-Overlap: 0.039 Total-Time: 4
Step:  20/200 Loss: 4.08835 Edge-Overlap: 0.280 Total-Time: 7
Step:  30/200 Loss: 3.35228 Edge-Overlap: 0.424 Total-Time: 11
Step:  40/200 Loss: 3.03328 Edge-Overlap: 0.496 Total-Time: 15
Step:  50/200 Loss: 2.87984 Edge-Overlap: 0.538 Total-Time: 19


In [7]:
# roc-auc, avg-precision

link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.9402992789798261, 0.9483153411123437)

In [8]:
generated_graphs = [model.sample_graph() for _ in range(5)]
stats = [compute_graph_statistics(gg) for gg in generated_graphs]
stat_df = pd.DataFrame({k: [s[k] for s in stats] for k in stats[0].keys()})

df = pd.DataFrame(stat_df.mean(), columns=['cell'])

#### SVD

In [9]:
# initialize model with EO-criterion
model = Cell(A=train_graph,
             H=9,
             g_type='svd',
             callbacks=[EdgeOverlapCriterion(invoke_every=10, edge_overlap_limit=.5)])

# train model 
model.train(steps=400,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-5})

Step:  10/400 Loss: 7.94054 Edge-Overlap: 0.002 Total-Time: 3
Step:  20/400 Loss: 7.70392 Edge-Overlap: 0.005 Total-Time: 7
Step:  30/400 Loss: 6.77800 Edge-Overlap: 0.048 Total-Time: 11
Step:  40/400 Loss: 5.62069 Edge-Overlap: 0.132 Total-Time: 16
Step:  50/400 Loss: 4.81414 Edge-Overlap: 0.184 Total-Time: 20
Step:  60/400 Loss: 4.37184 Edge-Overlap: 0.240 Total-Time: 24
Step:  70/400 Loss: 4.13638 Edge-Overlap: 0.271 Total-Time: 29
Step:  80/400 Loss: 4.01231 Edge-Overlap: 0.287 Total-Time: 33
Step:  90/400 Loss: 3.93262 Edge-Overlap: 0.299 Total-Time: 37
Step: 100/400 Loss: 3.86665 Edge-Overlap: 0.317 Total-Time: 42
Step: 110/400 Loss: 3.83988 Edge-Overlap: 0.326 Total-Time: 46
Step: 120/400 Loss: 3.82225 Edge-Overlap: 0.336 Total-Time: 50
Step: 130/400 Loss: 3.81283 Edge-Overlap: 0.345 Total-Time: 54
Step: 140/400 Loss: 3.82299 Edge-Overlap: 0.350 Total-Time: 58
Step: 150/400 Loss: 3.79810 Edge-Overlap: 0.343 Total-Time: 62
Step: 160/400 Loss: 3.79159 Edge-Overlap: 0.349 Total-Tim

In [10]:
# roc-auc, avg-precision

link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.9618265008983382, 0.9604507813567715)

In [11]:
generated_graphs = [model.sample_graph() for _ in range(5)]
stats = [compute_graph_statistics(gg) for gg in generated_graphs]
stat_df = pd.DataFrame({k: [s[k] for s in stats] for k in stats[0].keys()})

df['svd'] = stat_df.mean().T

#### our CELL

In [12]:
# initialize model with EO-criterion
model = Cell(A=train_graph,
             H=9,
             g_type='fc',
             callbacks=[EdgeOverlapCriterion(invoke_every=10, edge_overlap_limit=.5)])

# train model 
model.train(steps=200,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-4})

Step:  10/200 Loss: 7.40114 Edge-Overlap: 0.015 Total-Time: 4
Step:  20/200 Loss: 7.07412 Edge-Overlap: 0.021 Total-Time: 8
Step:  30/200 Loss: 6.40290 Edge-Overlap: 0.053 Total-Time: 12
Step:  40/200 Loss: 6.09345 Edge-Overlap: 0.056 Total-Time: 17
Step:  50/200 Loss: 4.98667 Edge-Overlap: 0.125 Total-Time: 21
Step:  60/200 Loss: 4.31011 Edge-Overlap: 0.245 Total-Time: 25
Step:  70/200 Loss: 3.76784 Edge-Overlap: 0.292 Total-Time: 30
Step:  80/200 Loss: 3.45488 Edge-Overlap: 0.398 Total-Time: 34
Step:  90/200 Loss: 3.24872 Edge-Overlap: 0.447 Total-Time: 38
Step: 100/200 Loss: 3.16611 Edge-Overlap: 0.496 Total-Time: 42
Step: 110/200 Loss: 3.05939 Edge-Overlap: 0.500 Total-Time: 47


In [13]:
# roc-auc, avg-precision

link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.9585432980211497, 0.960632798006726)

In [14]:
generated_graphs = [model.sample_graph() for _ in range(5)]
stats = [compute_graph_statistics(gg) for gg in generated_graphs]
stat_df = pd.DataFrame({k: [s[k] for s in stats] for k in stats[0].keys()})

df['cell+fc'] = stat_df.mean().T

#### Results

In [15]:
df['gt'] = compute_graph_statistics(_A_obs).values()

In [16]:
df

Unnamed: 0,cell,svd,cell+fc,gt
d_max,191.2,220.4,161.6,246.0
d_min,1.0,1.0,1.0,1.0
d,4.827758,4.827758,4.827758,5.680427
LCC,2806.8,2809.6,2798.0,2810.0
wedge_count,79524.2,91137.2,85742.0,137719.0
claw_count,1644038.0,2398984.0,1477212.0,3930163.0
triangle_count,1281.2,1357.2,2152.6,5247.0
square_count,6210.0,6090.0,15156.6,34507.0
power_law_exp,1.813757,1.836949,1.860847,1.767268
gini,0.4446095,0.4649044,0.4866,0.4964733


### Link Prediction Criterion

#### CELL

In [17]:
# initialize model with LP-criterion
#
model = Cell(A=train_graph,
             g_type='cell',
             H=9,
             callbacks=[LinkPredictionCriterion(invoke_every=2,
                                                val_ones=val_ones,
                                            val_zeros=val_zeros,
                                                max_patience=3)])

# train model 
model.train(steps=300,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-6})

Step:   2/300 Loss: 7.92768 ROC-AUC Score: 0.587 Average Precision: 0.595 Total-Time: 0
Step:   4/300 Loss: 7.72621 ROC-AUC Score: 0.674 Average Precision: 0.689 Total-Time: 1
Step:   6/300 Loss: 7.31004 ROC-AUC Score: 0.718 Average Precision: 0.742 Total-Time: 1
Step:   8/300 Loss: 6.70622 ROC-AUC Score: 0.758 Average Precision: 0.787 Total-Time: 2
Step:  10/300 Loss: 6.03905 ROC-AUC Score: 0.798 Average Precision: 0.824 Total-Time: 3
Step:  12/300 Loss: 5.45211 ROC-AUC Score: 0.835 Average Precision: 0.857 Total-Time: 4
Step:  14/300 Loss: 5.00860 ROC-AUC Score: 0.864 Average Precision: 0.885 Total-Time: 4
Step:  16/300 Loss: 4.65858 ROC-AUC Score: 0.881 Average Precision: 0.901 Total-Time: 5
Step:  18/300 Loss: 4.36250 ROC-AUC Score: 0.891 Average Precision: 0.908 Total-Time: 6
Step:  20/300 Loss: 4.11594 ROC-AUC Score: 0.899 Average Precision: 0.913 Total-Time: 6
Step:  22/300 Loss: 3.90864 ROC-AUC Score: 0.907 Average Precision: 0.920 Total-Time: 7
Step:  24/300 Loss: 3.73069 ROC-

In [18]:
# roc-auc, avg-precision

link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.9428728965023551, 0.9479145240090565)

#### SVD

In [38]:
# initialize model with LP-criterion
#
model = Cell(A=train_graph,
             g_type='svd',
             H=9,
             callbacks=[LinkPredictionCriterion(invoke_every=2,
                                                val_ones=val_ones,
                                            val_zeros=val_zeros,
                                                max_patience=3)])

# train model 
model.train(steps=300,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-6})

Step:   2/300 Loss: 7.94378 ROC-AUC Score: 0.476 Average Precision: 0.487 Total-Time: 0
Step:   4/300 Loss: 7.94337 ROC-AUC Score: 0.487 Average Precision: 0.486 Total-Time: 1
Step:   6/300 Loss: 7.94088 ROC-AUC Score: 0.550 Average Precision: 0.646 Total-Time: 1
Step:   8/300 Loss: 7.93964 ROC-AUC Score: 0.693 Average Precision: 0.764 Total-Time: 2
Step:  10/300 Loss: 7.92941 ROC-AUC Score: 0.808 Average Precision: 0.850 Total-Time: 3
Step:  12/300 Loss: 7.89748 ROC-AUC Score: 0.854 Average Precision: 0.887 Total-Time: 3
Step:  14/300 Loss: 7.82813 ROC-AUC Score: 0.865 Average Precision: 0.893 Total-Time: 4
Step:  16/300 Loss: 7.69850 ROC-AUC Score: 0.872 Average Precision: 0.897 Total-Time: 4
Step:  18/300 Loss: 7.52787 ROC-AUC Score: 0.879 Average Precision: 0.901 Total-Time: 5
Step:  20/300 Loss: 7.36237 ROC-AUC Score: 0.895 Average Precision: 0.914 Total-Time: 6
Step:  22/300 Loss: 7.13812 ROC-AUC Score: 0.914 Average Precision: 0.928 Total-Time: 6
Step:  24/300 Loss: 6.86460 ROC-

In [39]:
# roc-auc, avg-precision

link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.9509790868122073, 0.9587202585165912)

#### our CELL

In [40]:
# initialize model with LP-criterion
#
model = Cell(A=train_graph,
             g_type='fc',
             H=9,
             callbacks=[LinkPredictionCriterion(invoke_every=2,
                                                val_ones=val_ones,
                                            val_zeros=val_zeros,
                                                max_patience=3)])
# train model 
model.train(steps=300,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 5e-5})

Step:   2/300 Loss: 7.79555 ROC-AUC Score: 0.789 Average Precision: 0.818 Total-Time: 0
Step:   4/300 Loss: 7.50968 ROC-AUC Score: 0.777 Average Precision: 0.777 Total-Time: 1
Step:   6/300 Loss: 7.67894 ROC-AUC Score: 0.777 Average Precision: 0.782 Total-Time: 2
Step:   8/300 Loss: 7.41607 ROC-AUC Score: 0.816 Average Precision: 0.821 Total-Time: 2
Step:  10/300 Loss: 7.31942 ROC-AUC Score: 0.841 Average Precision: 0.858 Total-Time: 3
Step:  12/300 Loss: 7.21433 ROC-AUC Score: 0.846 Average Precision: 0.864 Total-Time: 4
Step:  14/300 Loss: 7.11461 ROC-AUC Score: 0.849 Average Precision: 0.866 Total-Time: 4
Step:  16/300 Loss: 7.00587 ROC-AUC Score: 0.858 Average Precision: 0.874 Total-Time: 5
Step:  18/300 Loss: 6.84312 ROC-AUC Score: 0.858 Average Precision: 0.869 Total-Time: 6
Step:  20/300 Loss: 6.71668 ROC-AUC Score: 0.876 Average Precision: 0.889 Total-Time: 6
Step:  22/300 Loss: 6.49325 ROC-AUC Score: 0.890 Average Precision: 0.904 Total-Time: 7
Step:  24/300 Loss: 6.30148 ROC-

In [41]:
# roc-auc, avg-precision

link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.9519925564026371, 0.9527348849922528)

# Citeseer

In [88]:
_A_obs, _X_obs, _z_obs = utils.load_npz('../data/citeseer.npz')
_A_obs = _A_obs + _A_obs.T
_A_obs[_A_obs > 1] = 1
_A_obs = _A_obs - sp.eye(_A_obs.shape[0], _A_obs.shape[0])
_A_obs[_A_obs < 0] = 0
lcc = utils.largest_connected_components(_A_obs)
_A_obs = _A_obs[lcc,:][:,lcc]
_N = _A_obs.shape[0]

Selecting 1 largest connected components


In [89]:
val_share = 0.05
test_share = 0.1
seed = 48

In [90]:
#there are self loops!

train_ones, val_ones, val_zeros, test_ones, test_zeros = utils.train_val_test_split_adjacency(_A_obs, val_share, test_share, seed, undirected=True, connected=True, asserts=False)

In [91]:
train_graph = sp.coo_matrix((np.ones(len(train_ones)),(train_ones[:,0], train_ones[:,1]))).tocsr()
assert (train_graph.toarray() == train_graph.toarray().T).all()

#### CELL

In [61]:
# initialize model with EO-criterion
model = Cell(A=train_graph,
             H=9,
             g_type='cell',
             callbacks=[EdgeOverlapCriterion(invoke_every=10, edge_overlap_limit=.5)])

# train model 
model.train(steps=200,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-7})

Step:  10/200 Loss: 5.30129 Edge-Overlap: 0.043 Total-Time: 2
Step:  20/200 Loss: 2.98896 Edge-Overlap: 0.348 Total-Time: 5
Step:  30/200 Loss: 2.07016 Edge-Overlap: 0.615 Total-Time: 7


In [62]:
# roc-auc, avg-precision

link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.8708131498109641, 0.8990004026174934)

In [63]:
generated_graphs = [model.sample_graph() for _ in range(5)]
stats = [compute_graph_statistics(gg) for gg in generated_graphs]
stat_df = pd.DataFrame({k: [s[k] for s in stats] for k in stats[0].keys()})

df = pd.DataFrame(stat_df.mean(), columns=['cell'])

#### SVD

In [64]:
# initialize model with EO-criterion
model = Cell(A=train_graph,
             H=9,
             g_type='svd',
             callbacks=[EdgeOverlapCriterion(invoke_every=10, edge_overlap_limit=.5)])

# train model 
model.train(steps=400,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-7})

Step:  10/400 Loss: 7.64390 Edge-Overlap: 0.003 Total-Time: 2
Step:  20/400 Loss: 7.04854 Edge-Overlap: 0.031 Total-Time: 5
Step:  30/400 Loss: 5.32554 Edge-Overlap: 0.126 Total-Time: 8
Step:  40/400 Loss: 3.61633 Edge-Overlap: 0.314 Total-Time: 11
Step:  50/400 Loss: 2.70742 Edge-Overlap: 0.526 Total-Time: 14


In [65]:
# roc-auc, avg-precision

link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.9079631379962193, 0.9267156559447554)

In [66]:
generated_graphs = [model.sample_graph() for _ in range(5)]
stats = [compute_graph_statistics(gg) for gg in generated_graphs]
stat_df = pd.DataFrame({k: [s[k] for s in stats] for k in stats[0].keys()})

df['svd'] = stat_df.mean().T

#### our CELL

In [67]:
# initialize model with EO-criterion
model = Cell(A=train_graph,
             H=9,
             g_type='fc',
             callbacks=[EdgeOverlapCriterion(invoke_every=10, edge_overlap_limit=.5)])

# train model 
model.train(steps=200,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.05,
                            'weight_decay': 1e-5})

Step:  10/200 Loss: 5.95888 Edge-Overlap: 0.036 Total-Time: 2
Step:  20/200 Loss: 3.62476 Edge-Overlap: 0.271 Total-Time: 5
Step:  30/200 Loss: 2.38713 Edge-Overlap: 0.583 Total-Time: 8


In [68]:
# roc-auc, avg-precision

link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.8821109995274101, 0.9077585519655101)

In [69]:
generated_graphs = [model.sample_graph() for _ in range(5)]
stats = [compute_graph_statistics(gg) for gg in generated_graphs]
stat_df = pd.DataFrame({k: [s[k] for s in stats] for k in stats[0].keys()})

df['cell+fc'] = stat_df.mean().T

#### Results

In [70]:
df['gt'] = compute_graph_statistics(_A_obs).values()

In [71]:
df

Unnamed: 0,cell,svd,cell+fc,gt
d_max,54.2,41.4,83.6,99.0
d_min,1.0,1.0,1.0,1.0
d,2.954502,2.954502,2.954502,3.476777
LCC,2070.4,2098.6,2023.8,2110.0
wedge_count,13311.8,12611.2,18030.0,25943.0
claw_count,57036.2,40100.2,153219.4,250348.0
triangle_count,146.6,127.0,221.0,1083.0
square_count,373.6,345.0,1087.6,5977.0
power_law_exp,2.186195,2.180404,2.263491,2.068238
gini,0.375974,0.373173,0.416334,0.42826


In [72]:
# initialize model with LP-criterion
#
model = Cell(A=train_graph,
             H=9,
             g_type='cell',
             callbacks=[LinkPredictionCriterion(invoke_every=2,
                                                val_ones=val_ones,
                                            val_zeros=val_zeros,
                                                max_patience=5)])

# train model 
model.train(steps=200,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-6})

Step:   2/200 Loss: 7.63092 ROC-AUC Score: 0.633 Average Precision: 0.629 Total-Time: 0
Step:   4/200 Loss: 7.36735 ROC-AUC Score: 0.700 Average Precision: 0.711 Total-Time: 1
Step:   6/200 Loss: 6.84293 ROC-AUC Score: 0.724 Average Precision: 0.742 Total-Time: 1
Step:   8/200 Loss: 6.09010 ROC-AUC Score: 0.731 Average Precision: 0.750 Total-Time: 1
Step:  10/200 Loss: 5.26915 ROC-AUC Score: 0.753 Average Precision: 0.774 Total-Time: 2
Step:  12/200 Loss: 4.53589 ROC-AUC Score: 0.788 Average Precision: 0.807 Total-Time: 3
Step:  14/200 Loss: 3.98187 ROC-AUC Score: 0.815 Average Precision: 0.834 Total-Time: 3
Step:  16/200 Loss: 3.57328 ROC-AUC Score: 0.832 Average Precision: 0.850 Total-Time: 4
Step:  18/200 Loss: 3.22611 ROC-AUC Score: 0.840 Average Precision: 0.856 Total-Time: 4
Step:  20/200 Loss: 2.93088 ROC-AUC Score: 0.850 Average Precision: 0.864 Total-Time: 4
Step:  22/200 Loss: 2.68452 ROC-AUC Score: 0.861 Average Precision: 0.875 Total-Time: 5
Step:  24/200 Loss: 2.47382 ROC-

In [73]:
# roc-auc, avg-precision

link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.8834106214555764, 0.9093101891993639)

In [74]:
# initialize model with LP-criterion
#
model = Cell(A=train_graph,
             H=9,
             g_type='svd',
             callbacks=[LinkPredictionCriterion(invoke_every=2,
                                                val_ones=val_ones,
                                            val_zeros=val_zeros,
                                                max_patience=5)])

# train model 
model.train(steps=200,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-6})

Step:   2/200 Loss: 7.65593 ROC-AUC Score: 0.561 Average Precision: 0.549 Total-Time: 0
Step:   4/200 Loss: 7.65572 ROC-AUC Score: 0.525 Average Precision: 0.525 Total-Time: 0
Step:   6/200 Loss: 7.65409 ROC-AUC Score: 0.580 Average Precision: 0.652 Total-Time: 1
Step:   8/200 Loss: 7.65061 ROC-AUC Score: 0.678 Average Precision: 0.742 Total-Time: 1
Step:  10/200 Loss: 7.63631 ROC-AUC Score: 0.774 Average Precision: 0.817 Total-Time: 2
Step:  12/200 Loss: 7.59732 ROC-AUC Score: 0.814 Average Precision: 0.846 Total-Time: 2
Step:  14/200 Loss: 7.51184 ROC-AUC Score: 0.846 Average Precision: 0.865 Total-Time: 2
Step:  16/200 Loss: 7.35162 ROC-AUC Score: 0.858 Average Precision: 0.875 Total-Time: 3
Step:  18/200 Loss: 7.13328 ROC-AUC Score: 0.880 Average Precision: 0.893 Total-Time: 3
Step:  20/200 Loss: 6.92330 ROC-AUC Score: 0.892 Average Precision: 0.905 Total-Time: 4
Step:  22/200 Loss: 6.66717 ROC-AUC Score: 0.900 Average Precision: 0.912 Total-Time: 4
Step:  24/200 Loss: 6.34331 ROC-

In [75]:
# roc-auc, avg-precision

link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.9213064154064272, 0.9388324968020753)

In [92]:
# initialize model with LP-criterion
#
model = Cell(A=train_graph,
             H=9,
             g_type='fc',
             callbacks=[LinkPredictionCriterion(invoke_every=2,
                                                val_ones=val_ones,
                                            val_zeros=val_zeros,
                                                max_patience=5)])

# train model 
model.train(steps=200,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-4})

Step:   2/200 Loss: 7.65439 ROC-AUC Score: 0.780 Average Precision: 0.795 Total-Time: 0
Step:   4/200 Loss: 7.36029 ROC-AUC Score: 0.815 Average Precision: 0.836 Total-Time: 0
Step:   6/200 Loss: 7.10053 ROC-AUC Score: 0.815 Average Precision: 0.841 Total-Time: 1
Step:   8/200 Loss: 6.99184 ROC-AUC Score: 0.807 Average Precision: 0.838 Total-Time: 1
Step:  10/200 Loss: 7.10693 ROC-AUC Score: 0.816 Average Precision: 0.844 Total-Time: 2
Step:  12/200 Loss: 6.86898 ROC-AUC Score: 0.814 Average Precision: 0.841 Total-Time: 2
Step:  14/200 Loss: 6.61680 ROC-AUC Score: 0.805 Average Precision: 0.837 Total-Time: 3
Step:  16/200 Loss: 6.33157 ROC-AUC Score: 0.814 Average Precision: 0.846 Total-Time: 3
Step:  18/200 Loss: 5.93422 ROC-AUC Score: 0.810 Average Precision: 0.846 Total-Time: 4
Step:  20/200 Loss: 5.55091 ROC-AUC Score: 0.789 Average Precision: 0.815 Total-Time: 4
Step:  22/200 Loss: 5.68030 ROC-AUC Score: 0.835 Average Precision: 0.853 Total-Time: 5
Step:  24/200 Loss: 5.16519 ROC-

In [93]:
# roc-auc, avg-precision

link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.9035178402646502, 0.9137068964808797)

# Prolblogs

In [78]:
_A_obs, _X_obs, _z_obs = utils.load_npz('../data/polblogs.npz')
_A_obs = _A_obs + _A_obs.T
_A_obs[_A_obs > 1] = 1
_A_obs = _A_obs - sp.eye(_A_obs.shape[0], _A_obs.shape[0])
_A_obs[_A_obs < 0] = 0
lcc = utils.largest_connected_components(_A_obs)
_A_obs = _A_obs[lcc,:][:,lcc]
_N = _A_obs.shape[0]

Selecting 1 largest connected components


In [79]:
val_share = 0.05
test_share = 0.1
seed = 48

In [80]:
train_ones, val_ones, val_zeros, test_ones, test_zeros = utils.train_val_test_split_adjacency(_A_obs, val_share, test_share, seed, undirected=True, connected=True, asserts=False)

In [81]:
train_graph = sp.coo_matrix((np.ones(len(train_ones)),(train_ones[:,0], train_ones[:,1]))).tocsr()
assert (train_graph.toarray() == train_graph.toarray().T).all()

#### CELL

In [82]:
# initialize model with EO-criterion
model = Cell(A=train_graph,
             H=9,
             g_type='cell',
             callbacks=[EdgeOverlapCriterion(invoke_every=10, edge_overlap_limit=.5)])

# train model 
model.train(steps=200,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-7})

Step:  10/200 Loss: 5.89207 Edge-Overlap: 0.277 Total-Time: 1
Step:  20/200 Loss: 5.49372 Edge-Overlap: 0.292 Total-Time: 2
Step:  30/200 Loss: 5.31090 Edge-Overlap: 0.332 Total-Time: 3
Step:  40/200 Loss: 5.21406 Edge-Overlap: 0.349 Total-Time: 5
Step:  50/200 Loss: 5.16100 Edge-Overlap: 0.365 Total-Time: 6
Step:  60/200 Loss: 5.12919 Edge-Overlap: 0.370 Total-Time: 8
Step:  70/200 Loss: 5.10804 Edge-Overlap: 0.378 Total-Time: 9
Step:  80/200 Loss: 5.09290 Edge-Overlap: 0.379 Total-Time: 10
Step:  90/200 Loss: 5.08140 Edge-Overlap: 0.380 Total-Time: 12
Step: 100/200 Loss: 5.07229 Edge-Overlap: 0.380 Total-Time: 13
Step: 110/200 Loss: 5.06476 Edge-Overlap: 0.383 Total-Time: 15
Step: 120/200 Loss: 5.05836 Edge-Overlap: 0.380 Total-Time: 16
Step: 130/200 Loss: 5.05281 Edge-Overlap: 0.379 Total-Time: 18
Step: 140/200 Loss: 5.04800 Edge-Overlap: 0.383 Total-Time: 19
Step: 150/200 Loss: 5.04366 Edge-Overlap: 0.396 Total-Time: 21
Step: 160/200 Loss: 5.03982 Edge-Overlap: 0.392 Total-Time: 22

In [83]:
# roc-auc, avg-precision

link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.9589632320076715, 0.959173802173185)

#### SVD

In [84]:
# initialize model with EO-criterion
model = Cell(A=train_graph,
             H=9,
             g_type='svd',
             callbacks=[EdgeOverlapCriterion(invoke_every=10, edge_overlap_limit=.5)])

# train model 
model.train(steps=400,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-7})

Step:  10/400 Loss: 7.05591 Edge-Overlap: 0.023 Total-Time: 1
Step:  20/400 Loss: 6.23551 Edge-Overlap: 0.177 Total-Time: 3
Step:  30/400 Loss: 5.66191 Edge-Overlap: 0.286 Total-Time: 4
Step:  40/400 Loss: 5.43132 Edge-Overlap: 0.317 Total-Time: 5
Step:  50/400 Loss: 5.32924 Edge-Overlap: 0.339 Total-Time: 7
Step:  60/400 Loss: 5.29189 Edge-Overlap: 0.333 Total-Time: 8
Step:  70/400 Loss: 5.25464 Edge-Overlap: 0.349 Total-Time: 10
Step:  80/400 Loss: 5.25657 Edge-Overlap: 0.343 Total-Time: 11
Step:  90/400 Loss: 5.23931 Edge-Overlap: 0.359 Total-Time: 13
Step: 100/400 Loss: 5.21296 Edge-Overlap: 0.356 Total-Time: 14
Step: 110/400 Loss: 5.22599 Edge-Overlap: 0.364 Total-Time: 16
Step: 120/400 Loss: 5.20059 Edge-Overlap: 0.367 Total-Time: 17
Step: 130/400 Loss: 5.19669 Edge-Overlap: 0.346 Total-Time: 19
Step: 140/400 Loss: 5.21904 Edge-Overlap: 0.359 Total-Time: 20
Step: 150/400 Loss: 5.18779 Edge-Overlap: 0.361 Total-Time: 22
Step: 160/400 Loss: 5.19212 Edge-Overlap: 0.362 Total-Time: 2

In [85]:
# roc-auc, avg-precision

link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.9588842732345122, 0.9576373593341284)

#### our CELL

In [86]:
# initialize model with EO-criterion
model = Cell(A=train_graph,
             H=9,
             g_type='fc',
             callbacks=[EdgeOverlapCriterion(invoke_every=10, edge_overlap_limit=.5)])

# train model 
model.train(steps=200,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.1,
                            'weight_decay': 1e-5})

Step:  10/200 Loss: 6.31043 Edge-Overlap: 0.134 Total-Time: 1
Step:  20/200 Loss: 5.83437 Edge-Overlap: 0.223 Total-Time: 3
Step:  30/200 Loss: 5.67837 Edge-Overlap: 0.241 Total-Time: 4
Step:  40/200 Loss: 5.62469 Edge-Overlap: 0.267 Total-Time: 6
Step:  50/200 Loss: 5.50926 Edge-Overlap: 0.282 Total-Time: 7
Step:  60/200 Loss: 5.41652 Edge-Overlap: 0.287 Total-Time: 9
Step:  70/200 Loss: 5.35707 Edge-Overlap: 0.324 Total-Time: 10
Step:  80/200 Loss: 5.28203 Edge-Overlap: 0.325 Total-Time: 12
Step:  90/200 Loss: 5.25425 Edge-Overlap: 0.346 Total-Time: 13
Step: 100/200 Loss: 5.20928 Edge-Overlap: 0.334 Total-Time: 15
Step: 110/200 Loss: 5.17866 Edge-Overlap: 0.358 Total-Time: 16
Step: 120/200 Loss: 5.16331 Edge-Overlap: 0.365 Total-Time: 18
Step: 130/200 Loss: 5.15793 Edge-Overlap: 0.352 Total-Time: 19
Step: 140/200 Loss: 5.13703 Edge-Overlap: 0.358 Total-Time: 21
Step: 150/200 Loss: 5.15559 Edge-Overlap: 0.361 Total-Time: 22
Step: 160/200 Loss: 5.11515 Edge-Overlap: 0.370 Total-Time: 2

In [87]:
# roc-auc, avg-precision

link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.9593751752902626, 0.9585088132467695)