# Comparison between GraphRNN and GRAN

## Setup

In [1]:
import os
import sys
import torch
import logging
import traceback
import numpy as np
from pprint import pprint
import pandas as pd
from runner.train_runners import *
from utils.logger import setup_logging
from utils.arg_helper import parse_arguments, get_config
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
torch.set_printoptions(profile='full')


## Useful Functions

In [2]:
df = pd.read_csv("save_model_learning.csv")
df

Unnamed: 0.1,Unnamed: 0,Date,dataset_name,dataset_num,model_name,num_epochs,file_dir
0,0,2021-Aug-15-01-56-10,community2,500,GRANMixtureBernoulli,5,exp/GRAN/community2\GRANMixtureBernoulli_commu...
1,0,2021-Aug-15-02-09-00,community4,500,GRANMixtureBernoulli,5,exp/GRAN/community4\GRANMixtureBernoulli_commu...
2,0,2021-Aug-15-02-29-46,community8,500,GRANMixtureBernoulli,5,exp/GRAN/community8\GRANMixtureBernoulli_commu...
3,0,2021-Aug-16-14-28-27,watts,500,GRANMixtureBernoulli,5,exp/GRAN/watts\GRANMixtureBernoulli_watts_2021...
4,0,2021-Aug-17-16-56-29,barabasi,500,GRANMixtureBernoulli,15,exp/GRAN/barabasi\GRANMixtureBernoulli_barabas...
5,0,2021-Aug-17-22-00-39,barabasi,500,GRANMixtureBernoulli,50,exp/GRAN/barabasi\GRANMixtureBernoulli_barabas...
6,0,2021-Aug-18-13-17-52,community2,500,RNN,1000,exp/GraphRNN/rnn/community2\RNN_community2_202...
7,0,2021-Aug-18-16-05-01,community4,500,RNN,1000,exp/GraphRNN/rnn/community4\RNN_community4_202...
8,0,2021-Aug-18-17-43-34,community4,500,RNN,1000,exp/GraphRNN/rnn/community4\RNN_community4_202...
9,0,2021-Aug-18-22-08-24,community8,500,RNN,1000,exp/GraphRNN/rnn/community8\RNN_community8_202...


## Research Questions
1) Which one of the model is the better for each dataset?

2) About GRAN, which node ordering gives better results ?

3) Is there a bias from GRAN to generates communities ? ( How many ? Scalability ? Robustness ?)

4) What are optimal M-parametes for GraphRNN for each dataset ?
Does the parameter tweaking change the efficiency significantly ?

5) Does the SotA autoregressive model are able to retain the small-world propertie from the graph ( = avg length btw 2 nodes is proportional to the log of the number of nodes N )
(create GNN classifier "small-world" or GAN)

## Experiments
####  1) Which one of the model is the better for each dataset?
####  2) About GRAN, which node ordering gives better results ?
####  3) Is there a bias from GRAN to generates communities ? ( How many ? Scalability ? Robustness ?)
####  4) What are optimal M-parametes for GraphRNN for each dataset ? Does the parameter tweaking change the efficiency significantly

## Datasets

### Erdos Renyi dataset
Parameters used : 500 graphs btw 100-200 nodes with p=0.1

Node ordering (GRAN) :

### Barabasi Albert Dataset
Parameters used :Parameters used : 500 graphs btw 100-200 nodes with k=4/5

Node ordering (GRAN) :

### Watts Strogatz Dataset
Parameters used :Parameters used : 500 graphs btw 100-200 nodes with p=0.01

Node ordering (GRAN) : DFS


### Community Dataset
Parameters used : graphs of 2/4/8 communities of between 12 to 17 nodes

Node ordering (GRAN) : DFS


## Results

In [3]:
def get_stats_from_trained_model(config):
    """Return all mmd statistical results from
    generated graph by the trained model, in the form of a dict"""

    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)
    config.use_gpu = config.use_gpu and torch.cuda.is_available()
    torch.cuda.empty_cache()

    runner = eval(config.runner)(config)

    mmd_degree_dev, mmd_clustering_dev, mmd_4orbits_dev, mmd_spectral_dev, mmd_degree_test, mmd_clustering_test, mmd_4orbits_test, mmd_spectral_test = runner.test()

    return {"mmd_degree_dev": mmd_degree_dev, "mmd_clustering_dev": mmd_clustering_dev,
            "mmd_4orbits_dev": mmd_4orbits_dev, "mmd_spectral_dev": mmd_spectral_dev,
            "mmd_degree_test": mmd_degree_test, "mmd_clustering_test": mmd_clustering_test,
            "mmd_4orbits_test": mmd_4orbits_test, "mmd_spectral_test": mmd_spectral_test}


row_list = []
for training_path in df['file_dir']:

    try :
        config_path = os.path.join(training_path, 'config.yaml')
        config = get_config(config_path)
    except :
        continue
    if training_path.find('mlp') == -1:
        dict_results = {"dataset_name": config.dataset.name, "model_name": config.model.name,
                    "num_epochs": config.train.max_epoch}
    else :
        dict_results = {"dataset_name": config.dataset.name, "model_name": config.model.name+"_MLP",
                    "num_epochs": config.train.max_epoch}

    dict_stats = get_stats_from_trained_model(config)
    dict_results.update(dict_stats)
    row_list.append(dict_results)
    torch.cuda.empty_cache()

result_df=pd.DataFrame(row_list)
torch.cuda.empty_cache()

max # nodes = 32 || mean # nodes = 32.0
max # edges = 170 || mean # edges = 162.574


100%|██████████| 20/20 [01:01<00:00,  3.08s/it]


max # nodes = 64 || mean # nodes = 64.0
max # edges = 347 || mean # edges = 335.038


100%|██████████| 20/20 [03:22<00:00, 10.14s/it]


max # nodes = 115 || mean # nodes = 115.0
max # edges = 614 || mean # edges = 593.484


100%|██████████| 20/20 [12:10<00:00, 36.54s/it]


max # nodes = 199 || mean # nodes = 149.5
max # edges = 199 || mean # edges = 149.5


100%|██████████| 40/40 [11:01<00:00, 16.54s/it]


max # nodes = 199 || mean # nodes = 149.5
max # edges = 780 || mean # edges = 582.0


100%|██████████| 40/40 [20:49<00:00, 31.23s/it]


max # nodes = 199 || mean # nodes = 149.5
max # edges = 780 || mean # edges = 582.0


100%|██████████| 40/40 [20:45<00:00, 31.13s/it]


max # nodes = 32 || mean # nodes = 32.0
max # edges = 170 || mean # edges = 162.574




max # nodes = 64 || mean # nodes = 64.0
max # edges = 347 || mean # edges = 335.038
max # nodes = 115 || mean # nodes = 115.0
max # edges = 614 || mean # edges = 593.484
max # nodes = 199 || mean # nodes = 149.5
max # edges = 199 || mean # edges = 149.5
max # nodes = 199 || mean # nodes = 149.5
max # edges = 780 || mean # edges = 582.0
max # nodes = 199 || mean # nodes = 149.5
max # edges = 780 || mean # edges = 582.0
max # nodes = 199 || mean # nodes = 149.5
max # edges = 780 || mean # edges = 582.0
max # nodes = 199 || mean # nodes = 149.5
max # edges = 199 || mean # edges = 149.5
max # nodes = 32 || mean # nodes = 32.0
max # edges = 170 || mean # edges = 162.574
max # nodes = 64 || mean # nodes = 64.0
max # edges = 347 || mean # edges = 335.038
max # nodes = 115 || mean # nodes = 115.0
max # edges = 614 || mean # edges = 593.484
max # nodes = 199 || mean # nodes = 149.5
max # edges = 199 || mean # edges = 149.5
max # nodes = 199 || mean # nodes = 149.5
max # edges = 199 || mean # ed

100%|██████████| 1/1 [00:15<00:00, 15.27s/it]


In [4]:
result_df


Unnamed: 0,dataset_name,model_name,num_epochs,mmd_degree_dev,mmd_clustering_dev,mmd_4orbits_dev,mmd_spectral_dev,mmd_degree_test,mmd_clustering_test,mmd_4orbits_test,mmd_spectral_test
0,community2,GRANMixtureBernoulli,5,0.015923,0.202623,0.1396051,0.071799,0.017348,0.276299,0.1712755,0.073148
1,community4,GRANMixtureBernoulli,5,0.009925,0.036726,0.4744825,0.02033,0.008988,0.033917,0.4378288,0.019204
2,community8,GRANMixtureBernoulli,5,0.019024,0.021497,0.5293842,0.004585,0.019817,0.020867,0.4909517,0.005272
3,watts,GRANMixtureBernoulli,5,0.120478,0.027812,0.001762392,0.173904,0.122088,0.031381,0.001768402,0.203878
4,barabasi,GRANMixtureBernoulli,15,0.053416,0.070331,0.09673317,0.010382,0.050535,0.207837,0.08002445,0.009157
5,barabasi,GRANMixtureBernoulli,50,0.029014,0.039132,0.1025972,0.013394,0.030451,0.205029,0.07112791,0.012538
6,community2,RNN,1000,0.042362,0.234516,0.3248894,0.107387,0.043783,0.308299,0.326366,0.110081
7,community4,RNN,1000,0.018449,0.067304,0.608483,0.028228,0.018187,0.064511,0.5924443,0.028171
8,community8,RNN,1000,0.062338,0.057058,0.6888835,0.05954,0.059766,0.055865,0.6393324,0.059826
9,watts,RNN,1000,-1e-06,2.8e-05,-3.300222e-08,0.011818,6e-06,4e-05,1.31932e-07,0.030097


In [5]:
result_df.style

Unnamed: 0,dataset_name,model_name,num_epochs,mmd_degree_dev,mmd_clustering_dev,mmd_4orbits_dev,mmd_spectral_dev,mmd_degree_test,mmd_clustering_test,mmd_4orbits_test,mmd_spectral_test
0,community2,GRANMixtureBernoulli,5,0.015923,0.202623,0.139605,0.071799,0.017348,0.276299,0.171276,0.073148
1,community4,GRANMixtureBernoulli,5,0.009925,0.036726,0.474483,0.02033,0.008988,0.033917,0.437829,0.019204
2,community8,GRANMixtureBernoulli,5,0.019024,0.021497,0.529384,0.004585,0.019817,0.020867,0.490952,0.005272
3,watts,GRANMixtureBernoulli,5,0.120478,0.027812,0.001762,0.173904,0.122088,0.031381,0.001768,0.203878
4,barabasi,GRANMixtureBernoulli,15,0.053416,0.070331,0.096733,0.010382,0.050535,0.207837,0.080024,0.009157
5,barabasi,GRANMixtureBernoulli,50,0.029014,0.039132,0.102597,0.013394,0.030451,0.205029,0.071128,0.012538
6,community2,RNN,1000,0.042362,0.234516,0.324889,0.107387,0.043783,0.308299,0.326366,0.110081
7,community4,RNN,1000,0.018449,0.067304,0.608483,0.028228,0.018187,0.064511,0.592444,0.028171
8,community8,RNN,1000,0.062338,0.057058,0.688883,0.05954,0.059766,0.055865,0.639332,0.059826
9,watts,RNN,1000,-1e-06,2.8e-05,-0.0,0.011818,6e-06,4e-05,0.0,0.030097


Here are the mmd (with emd) metrics results for each data
#### 2-Community Dataset stats

In [6]:
result_df[result_df['dataset_name']=="community2"].style.highlight_min(color = 'lightblue', axis = 0)

Unnamed: 0,dataset_name,model_name,num_epochs,mmd_degree_dev,mmd_clustering_dev,mmd_4orbits_dev,mmd_spectral_dev,mmd_degree_test,mmd_clustering_test,mmd_4orbits_test,mmd_spectral_test
0,community2,GRANMixtureBernoulli,5,0.015923,0.202623,0.139605,0.071799,0.017348,0.276299,0.171276,0.073148
6,community2,RNN,1000,0.042362,0.234516,0.324889,0.107387,0.043783,0.308299,0.326366,0.110081
14,community2,RNN_MLP,1000,0.042169,0.234598,0.527127,0.125244,0.043783,0.308377,0.499491,0.130945


#### 4-Community Dataset stats

In [7]:
result_df[result_df['dataset_name']=="community4"].style.highlight_min(color = 'lightblue', axis = 0)

Unnamed: 0,dataset_name,model_name,num_epochs,mmd_degree_dev,mmd_clustering_dev,mmd_4orbits_dev,mmd_spectral_dev,mmd_degree_test,mmd_clustering_test,mmd_4orbits_test,mmd_spectral_test
1,community4,GRANMixtureBernoulli,5,0.009925,0.036726,0.474483,0.02033,0.008988,0.033917,0.437829,0.019204
7,community4,RNN,1000,0.018449,0.067304,0.608483,0.028228,0.018187,0.064511,0.592444,0.028171
15,community4,RNN_MLP,1000,0.050438,0.067263,0.576653,0.041477,0.050584,0.064461,0.56037,0.039798


#### 8-Community Dataset stats

In [8]:
result_df[result_df['dataset_name']=="community8"].style.highlight_min(color = 'lightblue', axis = 0)

Unnamed: 0,dataset_name,model_name,num_epochs,mmd_degree_dev,mmd_clustering_dev,mmd_4orbits_dev,mmd_spectral_dev,mmd_degree_test,mmd_clustering_test,mmd_4orbits_test,mmd_spectral_test
2,community8,GRANMixtureBernoulli,5,0.019024,0.021497,0.529384,0.004585,0.019817,0.020867,0.490952,0.005272
8,community8,RNN,1000,0.062338,0.057058,0.688883,0.05954,0.059766,0.055865,0.639332,0.059826
16,community8,RNN_MLP,1000,0.104196,0.059649,0.704969,0.067516,0.1006,0.058457,0.655418,0.067214


#### Barabasi Dataset stats

In [9]:
result_df[result_df['dataset_name']=="barabasi"].style.highlight_min(color = 'lightblue', axis = 0)

Unnamed: 0,dataset_name,model_name,num_epochs,mmd_degree_dev,mmd_clustering_dev,mmd_4orbits_dev,mmd_spectral_dev,mmd_degree_test,mmd_clustering_test,mmd_4orbits_test,mmd_spectral_test
4,barabasi,GRANMixtureBernoulli,15,0.053416,0.070331,0.096733,0.010382,0.050535,0.207837,0.080024,0.009157
5,barabasi,GRANMixtureBernoulli,50,0.029014,0.039132,0.102597,0.013394,0.030451,0.205029,0.071128,0.012538
10,barabasi,RNN,1000,0.152335,0.383032,0.282156,0.050917,0.149083,0.514725,0.243675,0.019441
11,barabasi,RNN_MLP,1000,0.15602,0.30447,0.279178,0.046583,0.159737,0.444422,0.252652,0.019797
12,barabasi,RNN_MLP,1000,0.139028,0.310895,0.264158,0.053124,0.14022,0.469585,0.233821,0.020829


#### Watts-Strogatz (p=0.05 Graph mode) Dataset stats

In [10]:
result_df[result_df['dataset_name']=="watts"].style.highlight_min(color = 'lightblue', axis = 0)


Unnamed: 0,dataset_name,model_name,num_epochs,mmd_degree_dev,mmd_clustering_dev,mmd_4orbits_dev,mmd_spectral_dev,mmd_degree_test,mmd_clustering_test,mmd_4orbits_test,mmd_spectral_test
3,watts,GRANMixtureBernoulli,5,0.120478,0.027812,0.001762,0.173904,0.122088,0.031381,0.001768,0.203878
9,watts,RNN,1000,-1e-06,2.8e-05,-0.0,0.011818,6e-06,4e-05,0.0,0.030097
13,watts,RNN_MLP,1000,0.000116,8e-05,2e-06,0.013796,0.000328,3e-06,4e-06,0.023979


#### Watts-Strogatz (Ring p =0.0 Graph mode) Dataset stats

In [11]:
result_df[result_df['dataset_name']=="watts_ring"].style.highlight_min(color = 'lightblue', axis = 0)

Unnamed: 0,dataset_name,model_name,num_epochs,mmd_degree_dev,mmd_clustering_dev,mmd_4orbits_dev,mmd_spectral_dev,mmd_degree_test,mmd_clustering_test,mmd_4orbits_test,mmd_spectral_test
17,watts_ring,RNN_MLP,1000,0.006051,0.0,7.2e-05,0.099828,0.006051,0.0,7.2e-05,0.037247
18,watts_ring,RNN,1000,0.016619,0.000541,3.2e-05,0.119235,0.016619,0.000541,3.2e-05,0.061499
19,watts_ring,GRANMixtureBernoulli,50,0.00204,0.0,2.3e-05,0.09064,0.00204,0.0,2.3e-05,0.033953
