In [1]:
# import warnings
# warnings.filterwarnings('ignore')
%load_ext autoreload
%autoreload 2

import pickle
import numpy as np
import scipy.sparse as sp
from scipy.sparse import load_npz
import pandas as pd

import torch

from cell import utils
from cell.utils import link_prediction_performance, edge_overlap
from cell.cell import Cell, EdgeOverlapCriterion, LinkPredictionCriterion
from cell.graph_statistics import compute_graph_statistics

In [2]:
def run(g_type='cell', loss_fn=None, criterion='eo', nsteps=200, nsamples=5, optimizer_args=None, collect_stat=True):
    if criterion == 'eo':
        callbacks = [EdgeOverlapCriterion(invoke_every=10, edge_overlap_limit=.5)]
    elif criterion == 'val':
        callbacks = [LinkPredictionCriterion(invoke_every=2,
                                                val_ones=val_ones,
                                            val_zeros=val_zeros,
                                                max_patience=5)]
    else:
        raise NameError

    model = Cell(A=train_graph,
             H=9,
             g_type=g_type,
             loss_fn=loss_fn,
             callbacks=callbacks)

    # train model
    if optimizer_args is None:
         optimizer_args = {'lr': 0.1, 'weight_decay': 1e-5}
    model.train(steps=nsteps,
                optimizer_fn=torch.optim.Adam,
                optimizer_args=optimizer_args)
    
    roc_auc, avg_prec = link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)
    print(f'ROC-AUC: {roc_auc}, AVG_PREC: {avg_prec}')

    if collect_stat:
        generated_graphs = [model.sample_graph() for _ in range(nsamples)]
        stats = [compute_graph_statistics(gg) for gg in generated_graphs]
        stat_df = pd.DataFrame({k: [s[k] for s in stats] for k in stats[0].keys()})
        stat_df = stat_df.mean()
        stat_df['roc-auc'] = roc_auc
        stat_df['avg-prec'] = avg_prec

        return stat_df.T, generated_graphs

# CORA ML

In [3]:
#train_graph 
_A_obs, _X_obs, _z_obs = utils.load_npz('../data/cora_ml.npz')
_A_obs = _A_obs + _A_obs.T
_A_obs[_A_obs > 1] = 1
lcc = utils.largest_connected_components(_A_obs)
_A_obs = _A_obs[lcc,:][:,lcc]
_N = _A_obs.shape[0]

Selecting 1 largest connected components


In [4]:
val_share = 0.05
test_share = 0.1
seed = 42 #481516234

train_ones, val_ones, val_zeros, test_ones, test_zeros = utils.train_val_test_split_adjacency(_A_obs, val_share, test_share, seed, undirected=True, connected=True, asserts=True)

train_graph = sp.coo_matrix((np.ones(len(train_ones)),(train_ones[:,0], train_ones[:,1]))).tocsr()
assert (train_graph.toarray() == train_graph.toarray().T).all()

In [5]:
df = pd.DataFrame()

### Edge overlap

#### CELL

In [6]:
method = 'cell'
options = dict(g_type='cell')

stat_df, _ = run(**options)
df[method] = stat_df

Step:  10/200 Loss: 6.01586 Edge-Overlap: 0.038 Total-Time: 4
Step:  20/200 Loss: 4.05325 Edge-Overlap: 0.276 Total-Time: 8
Step:  30/200 Loss: 3.32439 Edge-Overlap: 0.426 Total-Time: 12
Step:  40/200 Loss: 3.01242 Edge-Overlap: 0.502 Total-Time: 16
ROC-AUC: 0.9457973906682476, AVG_PREC: 0.9525788314348327


#### SVD

In [7]:
method = 'svd'
options = dict(g_type='svd', nsteps=400)

stat_df, _ = run(**options)
df[method] = stat_df

Step:  10/400 Loss: 7.94193 Edge-Overlap: 0.002 Total-Time: 3
Step:  20/400 Loss: 7.92814 Edge-Overlap: 0.002 Total-Time: 8
Step:  30/400 Loss: 7.47896 Edge-Overlap: 0.020 Total-Time: 13
Step:  40/400 Loss: 6.79170 Edge-Overlap: 0.035 Total-Time: 17
Step:  50/400 Loss: 6.00597 Edge-Overlap: 0.097 Total-Time: 21
Step:  60/400 Loss: 5.20309 Edge-Overlap: 0.146 Total-Time: 25
Step:  70/400 Loss: 4.74022 Edge-Overlap: 0.174 Total-Time: 30
Step:  80/400 Loss: 4.49785 Edge-Overlap: 0.209 Total-Time: 34
Step:  90/400 Loss: 4.35357 Edge-Overlap: 0.221 Total-Time: 38
Step: 100/400 Loss: 4.27166 Edge-Overlap: 0.236 Total-Time: 42
Step: 110/400 Loss: 4.22694 Edge-Overlap: 0.249 Total-Time: 46
Step: 120/400 Loss: 4.19445 Edge-Overlap: 0.247 Total-Time: 52
Step: 130/400 Loss: 4.20267 Edge-Overlap: 0.251 Total-Time: 56
Step: 140/400 Loss: 4.18658 Edge-Overlap: 0.258 Total-Time: 61
Step: 150/400 Loss: 4.21205 Edge-Overlap: 0.247 Total-Time: 66
Step: 160/400 Loss: 4.19877 Edge-Overlap: 0.255 Total-Tim

#### our CELL

In [8]:
method = 'nonlin cell'
optimizer_args = dict(lr=0.1, weight_decay=1e-4)
options = dict(g_type='fc', optimizer_args=optimizer_args)

stat_df, _ = run(**options)
df[method] = stat_df

Step:  10/200 Loss: 7.26073 Edge-Overlap: 0.023 Total-Time: 4
Step:  20/200 Loss: 6.58085 Edge-Overlap: 0.049 Total-Time: 9
Step:  30/200 Loss: 5.71365 Edge-Overlap: 0.078 Total-Time: 14
Step:  40/200 Loss: 4.68742 Edge-Overlap: 0.159 Total-Time: 19
Step:  50/200 Loss: 4.06416 Edge-Overlap: 0.255 Total-Time: 24
Step:  60/200 Loss: 3.64424 Edge-Overlap: 0.343 Total-Time: 29
Step:  70/200 Loss: 3.37734 Edge-Overlap: 0.409 Total-Time: 33
Step:  80/200 Loss: 3.18726 Edge-Overlap: 0.445 Total-Time: 38
Step:  90/200 Loss: 3.01348 Edge-Overlap: 0.519 Total-Time: 42
ROC-AUC: 0.9547604092098854, AVG_PREC: 0.9581684006198069


#### Results

In [9]:
df['gt'] = list(compute_graph_statistics(_A_obs).values()) + [1, 1]

In [10]:
df

Unnamed: 0,cell,svd,nonlin cell,gt
d_max,184.6,214.0,196.4,246.0
d_min,1.0,1.0,1.0,1.0
d,4.827758,4.827758,4.827758,5.680427
LCC,2807.6,2810.0,2799.8,2810.0
wedge_count,76484.6,84570.0,95000.8,137719.0
claw_count,1486932.0,2107483.0,2176951.0,3930163.0
triangle_count,1206.4,1339.8,2036.8,5247.0
square_count,5472.6,5458.4,14354.4,34507.0
power_law_exp,1.806878,1.825743,1.850737,1.767268
gini,0.4377946,0.4546647,0.4782584,0.4964733


### Link Prediction Criterion

#### CELL

In [11]:
method = 'cell'
options = dict(g_type='cell', criterion='val', collect_stat=False)

run(**options)
#df[method] = stat_df

Step:   2/200 Loss: 7.92702 ROC-AUC Score: 0.602 Average Precision: 0.626 Total-Time: 0
Step:   4/200 Loss: 7.71740 ROC-AUC Score: 0.691 Average Precision: 0.710 Total-Time: 1
Step:   6/200 Loss: 7.27345 ROC-AUC Score: 0.740 Average Precision: 0.756 Total-Time: 2
Step:   8/200 Loss: 6.62059 ROC-AUC Score: 0.787 Average Precision: 0.799 Total-Time: 3
Step:  10/200 Loss: 5.89705 ROC-AUC Score: 0.833 Average Precision: 0.845 Total-Time: 4
Step:  12/200 Loss: 5.27090 ROC-AUC Score: 0.869 Average Precision: 0.883 Total-Time: 5
Step:  14/200 Loss: 4.81201 ROC-AUC Score: 0.888 Average Precision: 0.903 Total-Time: 6
Step:  16/200 Loss: 4.47060 ROC-AUC Score: 0.899 Average Precision: 0.913 Total-Time: 6
Step:  18/200 Loss: 4.19335 ROC-AUC Score: 0.907 Average Precision: 0.918 Total-Time: 9
Step:  20/200 Loss: 3.96231 ROC-AUC Score: 0.915 Average Precision: 0.924 Total-Time: 10
Step:  22/200 Loss: 3.77934 ROC-AUC Score: 0.921 Average Precision: 0.930 Total-Time: 11
Step:  24/200 Loss: 3.62857 RO

#### SVD

In [12]:
method = 'svd'
options = dict(g_type='svd', criterion='val', nsteps=400, collect_stat=False)

run(**options)
#df[method] = stat_dfmethod = 'cell'

Step:   2/400 Loss: 7.94520 ROC-AUC Score: 0.498 Average Precision: 0.505 Total-Time: 0
Step:   4/400 Loss: 7.94193 ROC-AUC Score: 0.500 Average Precision: 0.530 Total-Time: 1
Step:   6/400 Loss: 7.94166 ROC-AUC Score: 0.511 Average Precision: 0.536 Total-Time: 2
Step:   8/400 Loss: 7.94090 ROC-AUC Score: 0.579 Average Precision: 0.625 Total-Time: 3
Step:  10/400 Loss: 7.94088 ROC-AUC Score: 0.644 Average Precision: 0.711 Total-Time: 4
Step:  12/400 Loss: 7.93385 ROC-AUC Score: 0.723 Average Precision: 0.781 Total-Time: 4
Step:  14/400 Loss: 7.91625 ROC-AUC Score: 0.778 Average Precision: 0.823 Total-Time: 5
Step:  16/400 Loss: 7.87864 ROC-AUC Score: 0.805 Average Precision: 0.843 Total-Time: 6
Step:  18/400 Loss: 7.80791 ROC-AUC Score: 0.816 Average Precision: 0.845 Total-Time: 7
Step:  20/400 Loss: 7.69928 ROC-AUC Score: 0.834 Average Precision: 0.858 Total-Time: 8
Step:  22/400 Loss: 7.58459 ROC-AUC Score: 0.856 Average Precision: 0.874 Total-Time: 9
Step:  24/400 Loss: 7.46546 ROC-

#### our CELL

In [13]:
method = 'nonlin cell'
optimizer_args = dict(lr=0.1, weight_decay=5e-5)
options = dict(g_type='fc', criterion='val', optimizer_args=optimizer_args, collect_stat=False)

run(**options)
#df[method] = stat_df

Step:   2/200 Loss: 7.90419 ROC-AUC Score: 0.822 Average Precision: 0.840 Total-Time: 1
Step:   4/200 Loss: 7.57049 ROC-AUC Score: 0.827 Average Precision: 0.845 Total-Time: 1
Step:   6/200 Loss: 7.35356 ROC-AUC Score: 0.844 Average Precision: 0.856 Total-Time: 2
Step:   8/200 Loss: 7.31659 ROC-AUC Score: 0.843 Average Precision: 0.865 Total-Time: 3
Step:  10/200 Loss: 7.24757 ROC-AUC Score: 0.827 Average Precision: 0.854 Total-Time: 5
Step:  12/200 Loss: 7.19946 ROC-AUC Score: 0.844 Average Precision: 0.867 Total-Time: 6
Step:  14/200 Loss: 7.10661 ROC-AUC Score: 0.849 Average Precision: 0.871 Total-Time: 6
Step:  16/200 Loss: 6.93392 ROC-AUC Score: 0.855 Average Precision: 0.871 Total-Time: 7
Step:  18/200 Loss: 6.74417 ROC-AUC Score: 0.876 Average Precision: 0.891 Total-Time: 8
Step:  20/200 Loss: 6.53212 ROC-AUC Score: 0.891 Average Precision: 0.901 Total-Time: 10
Step:  22/200 Loss: 6.32547 ROC-AUC Score: 0.890 Average Precision: 0.893 Total-Time: 12
Step:  24/200 Loss: 6.14707 RO

# Citeseer

In [14]:
_A_obs, _X_obs, _z_obs = utils.load_npz('../data/citeseer.npz')
_A_obs = _A_obs + _A_obs.T
_A_obs[_A_obs > 1] = 1
_A_obs = _A_obs - sp.eye(_A_obs.shape[0], _A_obs.shape[0])
_A_obs[_A_obs < 0] = 0
lcc = utils.largest_connected_components(_A_obs)
_A_obs = _A_obs[lcc,:][:,lcc]
_N = _A_obs.shape[0]

Selecting 1 largest connected components


In [15]:
val_share = 0.05
test_share = 0.1
seed = 48

#there are self loops!

train_ones, val_ones, val_zeros, test_ones, test_zeros = utils.train_val_test_split_adjacency(_A_obs, val_share, test_share, seed, undirected=True, connected=True, asserts=False)

train_graph = sp.coo_matrix((np.ones(len(train_ones)),(train_ones[:,0], train_ones[:,1]))).tocsr()
assert (train_graph.toarray() == train_graph.toarray().T).all()

In [16]:
df = pd.DataFrame()

#### CELL

In [17]:
method = 'cell'
options = dict(g_type='cell')

stat_df, _ = run(**options)
df[method] = stat_df

Step:  10/200 Loss: 5.28629 Edge-Overlap: 0.044 Total-Time: 2
Step:  20/200 Loss: 2.91523 Edge-Overlap: 0.363 Total-Time: 4
Step:  30/200 Loss: 2.04880 Edge-Overlap: 0.611 Total-Time: 7
ROC-AUC: 0.8873980978260869, AVG_PREC: 0.9049761542549186


#### SVD

In [18]:
method = 'svd'
options = dict(g_type='svd', nsteps=400)

stat_df, _ = run(**options)
df[method] = stat_df

Step:  10/400 Loss: 7.65241 Edge-Overlap: 0.002 Total-Time: 3
Step:  20/400 Loss: 7.26079 Edge-Overlap: 0.011 Total-Time: 6
Step:  30/400 Loss: 5.88715 Edge-Overlap: 0.072 Total-Time: 8
Step:  40/400 Loss: 4.45293 Edge-Overlap: 0.158 Total-Time: 11
Step:  50/400 Loss: 3.48089 Edge-Overlap: 0.315 Total-Time: 14
Step:  60/400 Loss: 2.89399 Edge-Overlap: 0.444 Total-Time: 16
Step:  70/400 Loss: 2.59868 Edge-Overlap: 0.520 Total-Time: 19
ROC-AUC: 0.9151110586011343, AVG_PREC: 0.9294366246611824


#### our CELL

In [19]:
method = 'nonlin cell'
optimizer_args = dict(lr=0.05, weight_decay=1e-5)
options = dict(g_type='fc', optimizer_args=optimizer_args)

stat_df, _ = run(**options)
df[method] = stat_df

Step:  10/200 Loss: 6.31022 Edge-Overlap: 0.024 Total-Time: 3
Step:  20/200 Loss: 4.06900 Edge-Overlap: 0.207 Total-Time: 6
Step:  30/200 Loss: 2.59340 Edge-Overlap: 0.485 Total-Time: 10
Step:  40/200 Loss: 2.13415 Edge-Overlap: 0.644 Total-Time: 13
ROC-AUC: 0.8846733223062383, AVG_PREC: 0.8967969524264645


#### Results

In [20]:
df['gt'] = list(compute_graph_statistics(_A_obs).values()) + [1, 1]

In [21]:
df

Unnamed: 0,cell,svd,nonlin cell,gt
d_max,55.0,49.8,70.8,99.0
d_min,1.0,1.0,1.0,1.0
d,2.954502,2.954502,2.954502,3.476777
LCC,2073.8,2099.2,1966.8,2110.0
wedge_count,13354.8,13328.8,18488.6,25943.0
claw_count,57748.6,52871.0,136247.8,250348.0
triangle_count,144.8,146.0,279.2,1083.0
square_count,439.0,415.4,1075.0,5977.0
power_law_exp,2.18704,2.195489,2.299983,2.068238
gini,0.376319,0.38123,0.433082,0.42826


In [22]:
method = 'cell'
options = dict(g_type='cell', criterion='val', collect_stat=False)

run(**options)
#df[method] = stat_df

Step:   2/200 Loss: 7.63157 ROC-AUC Score: 0.601 Average Precision: 0.613 Total-Time: 0
Step:   4/200 Loss: 7.36956 ROC-AUC Score: 0.660 Average Precision: 0.685 Total-Time: 1
Step:   6/200 Loss: 6.84330 ROC-AUC Score: 0.692 Average Precision: 0.718 Total-Time: 2
Step:   8/200 Loss: 6.07910 ROC-AUC Score: 0.718 Average Precision: 0.745 Total-Time: 2
Step:  10/200 Loss: 5.23607 ROC-AUC Score: 0.757 Average Precision: 0.784 Total-Time: 3
Step:  12/200 Loss: 4.48084 ROC-AUC Score: 0.798 Average Precision: 0.825 Total-Time: 3
Step:  14/200 Loss: 3.91412 ROC-AUC Score: 0.830 Average Precision: 0.857 Total-Time: 4
Step:  16/200 Loss: 3.49612 ROC-AUC Score: 0.853 Average Precision: 0.877 Total-Time: 5
Step:  18/200 Loss: 3.14994 ROC-AUC Score: 0.867 Average Precision: 0.887 Total-Time: 6
Step:  20/200 Loss: 2.85988 ROC-AUC Score: 0.877 Average Precision: 0.896 Total-Time: 8
Step:  22/200 Loss: 2.62161 ROC-AUC Score: 0.885 Average Precision: 0.904 Total-Time: 8
Step:  24/200 Loss: 2.42357 ROC-

In [23]:
method = 'svd'
options = dict(g_type='svd', criterion='val', nsteps=400, collect_stat=False)

run(**options)
#df[method] = stat_df

Step:   2/400 Loss: 7.65646 ROC-AUC Score: 0.518 Average Precision: 0.504 Total-Time: 0
Step:   4/400 Loss: 7.65481 ROC-AUC Score: 0.465 Average Precision: 0.475 Total-Time: 1
Step:   6/400 Loss: 7.65462 ROC-AUC Score: 0.527 Average Precision: 0.600 Total-Time: 2
Step:   8/400 Loss: 7.65357 ROC-AUC Score: 0.651 Average Precision: 0.729 Total-Time: 2
Step:  10/400 Loss: 7.64557 ROC-AUC Score: 0.726 Average Precision: 0.792 Total-Time: 4
Step:  12/400 Loss: 7.61561 ROC-AUC Score: 0.809 Average Precision: 0.856 Total-Time: 7
Step:  14/400 Loss: 7.54284 ROC-AUC Score: 0.875 Average Precision: 0.902 Total-Time: 8
Step:  16/400 Loss: 7.39171 ROC-AUC Score: 0.882 Average Precision: 0.908 Total-Time: 9
Step:  18/400 Loss: 7.14557 ROC-AUC Score: 0.896 Average Precision: 0.915 Total-Time: 9
Step:  20/400 Loss: 6.88452 ROC-AUC Score: 0.899 Average Precision: 0.916 Total-Time: 10
Step:  22/400 Loss: 6.58569 ROC-AUC Score: 0.912 Average Precision: 0.926 Total-Time: 11
Step:  24/400 Loss: 6.19785 RO

In [24]:
method = 'nonlin cell'
optimizer_args = dict(lr=0.1, weight_decay=1e-5)
options = dict(g_type='fc', criterion='val', optimizer_args=optimizer_args, collect_stat=False)

run(**options)
#df[method] = stat_df

Step:   2/200 Loss: 7.67045 ROC-AUC Score: 0.780 Average Precision: 0.808 Total-Time: 0
Step:   4/200 Loss: 7.19226 ROC-AUC Score: 0.737 Average Precision: 0.770 Total-Time: 1
Step:   6/200 Loss: 6.90732 ROC-AUC Score: 0.771 Average Precision: 0.791 Total-Time: 1
Step:   8/200 Loss: 6.57721 ROC-AUC Score: 0.721 Average Precision: 0.761 Total-Time: 2
Step:  10/200 Loss: 6.36019 ROC-AUC Score: 0.737 Average Precision: 0.763 Total-Time: 2
Step:  12/200 Loss: 5.75468 ROC-AUC Score: 0.755 Average Precision: 0.774 Total-Time: 3
Step:  14/200 Loss: 5.29272 ROC-AUC Score: 0.761 Average Precision: 0.785 Total-Time: 4
ROC-AUC: 0.7592081167296786, AVG_PREC: 0.7483266535055098


# Prolblogs

In [25]:
_A_obs, _X_obs, _z_obs = utils.load_npz('../data/polblogs.npz')
_A_obs = _A_obs + _A_obs.T
_A_obs[_A_obs > 1] = 1
_A_obs = _A_obs - sp.eye(_A_obs.shape[0], _A_obs.shape[0])
_A_obs[_A_obs < 0] = 0
lcc = utils.largest_connected_components(_A_obs)
_A_obs = _A_obs[lcc,:][:,lcc]
_N = _A_obs.shape[0]

Selecting 1 largest connected components


In [26]:
val_share = 0.05
test_share = 0.1
seed = 48

train_ones, val_ones, val_zeros, test_ones, test_zeros = utils.train_val_test_split_adjacency(_A_obs, val_share, test_share, seed, undirected=True, connected=True, asserts=False)

train_graph = sp.coo_matrix((np.ones(len(train_ones)),(train_ones[:,0], train_ones[:,1]))).tocsr()
assert (train_graph.toarray() == train_graph.toarray().T).all()

In [27]:
df = pd.DataFrame()

#### CELL

In [28]:
method = 'cell'
options = dict(g_type='cell')

stat_df, _ = run(**options)
df[method] = stat_df

Step:  10/200 Loss: 5.84881 Edge-Overlap: 0.281 Total-Time: 1
Step:  20/200 Loss: 5.47476 Edge-Overlap: 0.299 Total-Time: 2
Step:  30/200 Loss: 5.30328 Edge-Overlap: 0.323 Total-Time: 3
Step:  40/200 Loss: 5.22759 Edge-Overlap: 0.343 Total-Time: 4
Step:  50/200 Loss: 5.18936 Edge-Overlap: 0.352 Total-Time: 5
Step:  60/200 Loss: 5.16904 Edge-Overlap: 0.353 Total-Time: 7
Step:  70/200 Loss: 5.15706 Edge-Overlap: 0.352 Total-Time: 9
Step:  80/200 Loss: 5.14958 Edge-Overlap: 0.355 Total-Time: 10
Step:  90/200 Loss: 5.14460 Edge-Overlap: 0.358 Total-Time: 11
Step: 100/200 Loss: 5.14109 Edge-Overlap: 0.368 Total-Time: 12
Step: 110/200 Loss: 5.13836 Edge-Overlap: 0.361 Total-Time: 14
Step: 120/200 Loss: 5.13609 Edge-Overlap: 0.360 Total-Time: 15
Step: 130/200 Loss: 5.13421 Edge-Overlap: 0.363 Total-Time: 16
Step: 140/200 Loss: 5.13263 Edge-Overlap: 0.357 Total-Time: 17
Step: 150/200 Loss: 5.13125 Edge-Overlap: 0.357 Total-Time: 18
Step: 160/200 Loss: 5.13002 Edge-Overlap: 0.363 Total-Time: 19

#### SVD

In [29]:
method = 'svd'
options = dict(g_type='svd', nsteps=400)

stat_df, _ = run(**options)
df[method] = stat_df

Step:  10/400 Loss: 7.07858 Edge-Overlap: 0.022 Total-Time: 1
Step:  20/400 Loss: 6.39902 Edge-Overlap: 0.161 Total-Time: 2
Step:  30/400 Loss: 5.91235 Edge-Overlap: 0.220 Total-Time: 4
Step:  40/400 Loss: 5.70091 Edge-Overlap: 0.259 Total-Time: 5
Step:  50/400 Loss: 5.58715 Edge-Overlap: 0.276 Total-Time: 6
Step:  60/400 Loss: 5.53881 Edge-Overlap: 0.286 Total-Time: 7
Step:  70/400 Loss: 5.51130 Edge-Overlap: 0.290 Total-Time: 8
Step:  80/400 Loss: 5.49320 Edge-Overlap: 0.292 Total-Time: 10
Step:  90/400 Loss: 5.48540 Edge-Overlap: 0.288 Total-Time: 11
Step: 100/400 Loss: 5.47701 Edge-Overlap: 0.289 Total-Time: 12
Step: 110/400 Loss: 5.46259 Edge-Overlap: 0.295 Total-Time: 13
Step: 120/400 Loss: 5.46716 Edge-Overlap: 0.290 Total-Time: 14
Step: 130/400 Loss: 5.45630 Edge-Overlap: 0.297 Total-Time: 16
Step: 140/400 Loss: 5.46460 Edge-Overlap: 0.295 Total-Time: 17
Step: 150/400 Loss: 5.44760 Edge-Overlap: 0.297 Total-Time: 18
Step: 160/400 Loss: 5.48187 Edge-Overlap: 0.306 Total-Time: 19

#### our CELL

In [30]:
method = 'nonlin cell'
optimizer_args={'lr': 0.1, 'weight_decay': 1e-5}
options = dict(g_type='fc', optimizer_args=optimizer_args)

stat_df, _ = run(**options)
df[method] = stat_df

Step:  10/200 Loss: 6.32950 Edge-Overlap: 0.152 Total-Time: 1
Step:  20/200 Loss: 5.85202 Edge-Overlap: 0.231 Total-Time: 8
Step:  30/200 Loss: 5.73939 Edge-Overlap: 0.226 Total-Time: 11
Step:  40/200 Loss: 5.71152 Edge-Overlap: 0.241 Total-Time: 13
Step:  50/200 Loss: 5.58731 Edge-Overlap: 0.267 Total-Time: 14
Step:  60/200 Loss: 5.49715 Edge-Overlap: 0.281 Total-Time: 15
Step:  70/200 Loss: 5.44083 Edge-Overlap: 0.298 Total-Time: 17
Step:  80/200 Loss: 5.39373 Edge-Overlap: 0.289 Total-Time: 18
Step:  90/200 Loss: 5.33906 Edge-Overlap: 0.312 Total-Time: 20
Step: 100/200 Loss: 5.31527 Edge-Overlap: 0.334 Total-Time: 21
Step: 110/200 Loss: 5.29251 Edge-Overlap: 0.320 Total-Time: 23
Step: 120/200 Loss: 5.28192 Edge-Overlap: 0.329 Total-Time: 24
Step: 130/200 Loss: 5.24916 Edge-Overlap: 0.330 Total-Time: 26
Step: 140/200 Loss: 5.24316 Edge-Overlap: 0.343 Total-Time: 27
Step: 150/200 Loss: 5.22756 Edge-Overlap: 0.339 Total-Time: 31
Step: 160/200 Loss: 5.22564 Edge-Overlap: 0.337 Total-Tim

In [31]:
df['gt'] = list(compute_graph_statistics(_A_obs).values()) + [1, 1]

In [32]:
df

Unnamed: 0,cell,svd,nonlin cell,gt
d_max,263.0,261.4,266.4,351.0
d_min,1.0,1.0,1.0,1.0
d,23.25041,23.25041,23.25041,27.35516
LCC,1222.0,1221.6,1221.6,1222.0
wedge_count,851763.0,841606.6,914952.2,1341525.0
claw_count,29174630.0,27672970.0,33647310.0,62800780.0
triangle_count,40730.8,42347.0,45127.6,101043.0
square_count,1575500.0,1620467.0,1854367.0,5171257.0
power_law_exp,1.40683,1.410251,1.428202,1.414274
gini,0.5772717,0.5810389,0.6054291,0.6220195


# RT-GOP

In [33]:
_A_obs, _X_obs, _z_obs = utils.load_npz('../data/rt_gop.npz')
_A_obs = _A_obs + _A_obs.T
_A_obs[_A_obs > 1] = 1
_A_obs = _A_obs - sp.eye(_A_obs.shape[0], _A_obs.shape[0])
_A_obs[_A_obs < 0] = 0
lcc = utils.largest_connected_components(_A_obs)
_A_obs = _A_obs[lcc,:][:,lcc]
_N = _A_obs.shape[0]

Selecting 1 largest connected components


In [34]:
val_share = 0.05
test_share = 0.1
seed = 48

train_ones, val_ones, val_zeros, test_ones, test_zeros = utils.train_val_test_split_adjacency(_A_obs, val_share, test_share, seed, undirected=True, connected=True, asserts=False)

train_graph = sp.coo_matrix((np.ones(len(train_ones)),(train_ones[:,0], train_ones[:,1]))).tocsr()
assert (train_graph.toarray() == train_graph.toarray().T).all()

In [35]:
df = pd.DataFrame()

In [36]:
method = 'cell'
options = dict(g_type='cell')

stat_df, _ = run(**options)
df[method] = stat_df

Step:  10/200 Loss: 5.72706 Edge-Overlap: 0.019 Total-Time: 9
Step:  20/200 Loss: 2.51444 Edge-Overlap: 0.545 Total-Time: 19
ROC-AUC: 0.6790294412803504, AVG_PREC: 0.682087804284342


In [37]:
method = 'svd'
options = dict(g_type='svd', nsteps=400)

stat_df, _ = run(**options)
df[method] = stat_df

Step:  10/400 Loss: 8.45468 Edge-Overlap: 0.001 Total-Time: 10
Step:  20/400 Loss: 8.16276 Edge-Overlap: 0.001 Total-Time: 22
Step:  30/400 Loss: 7.08700 Edge-Overlap: 0.052 Total-Time: 35
Step:  40/400 Loss: 6.06453 Edge-Overlap: 0.110 Total-Time: 45
Step:  50/400 Loss: 5.45302 Edge-Overlap: 0.124 Total-Time: 57
Step:  60/400 Loss: 5.03167 Edge-Overlap: 0.146 Total-Time: 65
Step:  70/400 Loss: 4.70072 Edge-Overlap: 0.180 Total-Time: 78
Step:  80/400 Loss: 4.54685 Edge-Overlap: 0.205 Total-Time: 88
Step:  90/400 Loss: 4.42346 Edge-Overlap: 0.214 Total-Time: 97
Step: 100/400 Loss: 4.42192 Edge-Overlap: 0.230 Total-Time: 107
Step: 110/400 Loss: 4.31891 Edge-Overlap: 0.234 Total-Time: 119
Step: 120/400 Loss: 4.25269 Edge-Overlap: 0.251 Total-Time: 129
Step: 130/400 Loss: 4.34690 Edge-Overlap: 0.249 Total-Time: 140
Step: 140/400 Loss: 4.29001 Edge-Overlap: 0.256 Total-Time: 150
Step: 150/400 Loss: 4.29676 Edge-Overlap: 0.286 Total-Time: 162
Step: 160/400 Loss: 4.28866 Edge-Overlap: 0.272 T

In [38]:
method = 'nonlin cell'
optimizer_args={'lr': 0.1, 'weight_decay': 1e-5}
options = dict(g_type='fc', optimizer_args=optimizer_args)

stat_df, _ = run(**options)
df[method] = stat_df

Step:  10/200 Loss: 7.29063 Edge-Overlap: 0.014 Total-Time: 12
Step:  20/200 Loss: 4.53364 Edge-Overlap: 0.210 Total-Time: 23
Step:  30/200 Loss: 2.81857 Edge-Overlap: 0.589 Total-Time: 37
ROC-AUC: 0.8339578255939736, AVG_PREC: 0.8281045442188132


In [39]:
df['gt'] = list(compute_graph_statistics(_A_obs).values()) + [1, 1]

In [40]:
df

Unnamed: 0,cell,svd,nonlin cell,gt
d_max,271.0,209.8,255.2,308.0
d_min,1.0,1.0,1.0,1.0
d,2.005121,2.005121,2.005121,2.359292
LCC,4109.0,4426.4,3989.0,4687.0
wedge_count,86353.6,62557.2,77205.6,122755.0
claw_count,4402984.0,2163120.0,3610721.0,6526042.0
triangle_count,0.2,0.2,0.0,33.0
square_count,10.2,4.2,11.6,369.0
power_law_exp,3.937709,4.081575,4.201475,3.492755
gini,0.4323066,0.4429883,0.4479705,0.5020176
