## Comparison between GraphRNN and GRAN

## Setup

In [None]:
import os
import sys
import torch
import logging
import traceback
import numpy as np
from pprint import pprint
import pandas as pd
from runner.train_runners import *
from utils.logger import setup_logging
from utils.arg_helper import parse_arguments, get_config
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
torch.set_printoptions(profile='full')


## Useful Functions

In [7]:
df = pd.read_csv("save_model_learning.csv")
df

Unnamed: 0.1,Unnamed: 0,Date,dataset_name,dataset_num,model_name,num_epochs,file_dir
0,0,2021-Aug-15-01-56-10,community2,500,GRANMixtureBernoulli,5,exp/GRAN/community2\GRANMixtureBernoulli_commu...
1,0,2021-Aug-15-02-09-00,community4,500,GRANMixtureBernoulli,5,exp/GRAN/community4\GRANMixtureBernoulli_commu...
2,0,2021-Aug-15-02-29-46,community8,500,GRANMixtureBernoulli,5,exp/GRAN/community8\GRANMixtureBernoulli_commu...
3,0,2021-Aug-16-14-28-27,watts,500,GRANMixtureBernoulli,5,exp/GRAN/watts\GRANMixtureBernoulli_watts_2021...
4,0,2021-Aug-17-16-56-29,barabasi,500,GRANMixtureBernoulli,15,exp/GRAN/barabasi\GRANMixtureBernoulli_barabas...
5,0,2021-Aug-17-22-00-39,barabasi,500,GRANMixtureBernoulli,50,exp/GRAN/barabasi\GRANMixtureBernoulli_barabas...
6,0,2021-Aug-18-13-17-52,community2,500,RNN,1000,exp/GraphRNN/rnn/community2\RNN_community2_202...
7,0,2021-Aug-18-16-05-01,community4,500,RNN,1000,exp/GraphRNN/rnn/community4\RNN_community4_202...
8,0,2021-Aug-18-17-43-34,community4,500,RNN,1000,exp/GraphRNN/rnn/community4\RNN_community4_202...
9,0,2021-Aug-18-22-08-24,community8,500,RNN,1000,exp/GraphRNN/rnn/community8\RNN_community8_202...


## Research Questions
1) Which one of the model is the better for each dataset?

2) About GRAN, which node ordering gives better results ?

3) Is there a bias from GRAN to generates communities ? ( How many ? Scalability ? Robustness ?)

4) What are optimal M-parametes for GraphRNN for each dataset ?
Does the parameter tweaking change the efficiency significantly ?

5) Does the SotA autoregressive model are able to retain the small-world propertie from the graph ( = avg length btw 2 nodes is proportional to the log of the number of nodes N )
(create GNN classifier "small-world" or GAN)

## Experiments
####  1) Which one of the model is the better for each dataset?
####  2) About GRAN, which node ordering gives better results ?
####  3) Is there a bias from GRAN to generates communities ? ( How many ? Scalability ? Robustness ?)
####  4) What are optimal M-parametes for GraphRNN for each dataset ? Does the parameter tweaking change the efficiency significantly

## Datasets

### Erdos Renyi dataset
Parameters used : 500 graphs btw 100-200 nodes with p=0.1

Node ordering (GRAN) :

### Barabasi Albert Dataset
Parameters used :Parameters used : 500 graphs btw 100-200 nodes with k=4/5

Node ordering (GRAN) :

### Watts Strogatz Dataset
Parameters used :Parameters used : 500 graphs btw 100-200 nodes with p=0.01

Node ordering (GRAN) : DFS


### Community Dataset
Parameters used : graphs of 2/4/8 communities of between 12 to 17 nodes

Node ordering (GRAN) : DFS


## Results

In [8]:
def get_stats_from_trained_model(config):
    """Return all mmd statistical results from
    generated graph by the trained model, in the form of a dict"""

    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)
    config.use_gpu = config.use_gpu and torch.cuda.is_available()
    torch.cuda.empty_cache()

    runner = eval(config.runner)(config)

    mmd_degree_dev, mmd_clustering_dev, mmd_4orbits_dev, mmd_spectral_dev, mmd_degree_test, mmd_clustering_test, mmd_4orbits_test, mmd_spectral_test = runner.test()

    return {"mmd_degree_dev": mmd_degree_dev, "mmd_clustering_dev": mmd_clustering_dev,
            "mmd_4orbits_dev": mmd_4orbits_dev, "mmd_spectral_dev": mmd_spectral_dev,
            "mmd_degree_test": mmd_degree_test, "mmd_clustering_test": mmd_clustering_test,
            "mmd_4orbits_test": mmd_4orbits_test, "mmd_spectral_test": mmd_spectral_test}


row_list = []
for training_path in df['file_dir']:

    try :
        config_path = os.path.join(training_path, 'config.yaml')
        config = get_config(config_path)
    except :
        continue

    dict_results = {"dataset_name": config.dataset.name, "model_name": config.model.name,
                    "num_epochs": config.train.max_epoch}
    dict_stats = get_stats_from_trained_model(config)
    dict_results.update(dict_stats)
    row_list.append(dict_results)
    torch.cuda.empty_cache()

result_df=pd.DataFrame(row_list)
torch.cuda.empty_cache()

max # nodes = 32 || mean # nodes = 32.0
max # edges = 170 || mean # edges = 162.574


100%|██████████| 1/1 [00:01<00:00,  1.42s/it]


max # nodes = 64 || mean # nodes = 64.0
max # edges = 347 || mean # edges = 335.038


100%|██████████| 1/1 [00:04<00:00,  4.34s/it]


max # nodes = 115 || mean # nodes = 115.0
max # edges = 614 || mean # edges = 593.484


100%|██████████| 1/1 [00:15<00:00, 15.73s/it]


max # nodes = 199 || mean # nodes = 149.5
max # edges = 199 || mean # edges = 149.5


100%|██████████| 1/1 [00:14<00:00, 14.31s/it]


max # nodes = 199 || mean # nodes = 149.5
max # edges = 780 || mean # edges = 582.0


100%|██████████| 1/1 [00:25<00:00, 25.77s/it]


max # nodes = 199 || mean # nodes = 149.5
max # edges = 780 || mean # edges = 582.0


100%|██████████| 1/1 [00:25<00:00, 25.84s/it]


max # nodes = 32 || mean # nodes = 32.0
max # edges = 170 || mean # edges = 162.574




max # nodes = 64 || mean # nodes = 64.0
max # edges = 347 || mean # edges = 335.038
max # nodes = 115 || mean # nodes = 115.0
max # edges = 614 || mean # edges = 593.484
max # nodes = 199 || mean # nodes = 149.5
max # edges = 199 || mean # edges = 149.5
max # nodes = 199 || mean # nodes = 149.5
max # edges = 780 || mean # edges = 582.0


In [9]:
result_df


Unnamed: 0,dataset_name,model_name,num_epochs,mmd_degree_dev,mmd_clustering_dev,mmd_4orbits_dev,mmd_spectral_dev,mmd_degree_test,mmd_clustering_test,mmd_4orbits_test,mmd_spectral_test
0,community2,GRANMixtureBernoulli,5,0.023199,0.252587,0.1925285,0.08545,0.025109,0.326359,0.2204202,0.08566
1,community4,GRANMixtureBernoulli,5,0.010807,0.085846,0.5667613,0.024514,0.009778,0.08306,0.5280681,0.022718
2,community8,GRANMixtureBernoulli,5,0.021233,0.070621,0.5788287,0.006138,0.021949,0.070023,0.5427156,0.006804
3,watts,GRANMixtureBernoulli,5,0.128493,0.030957,0.001776086,0.177668,0.121638,0.032091,0.001825216,0.204885
4,barabasi,GRANMixtureBernoulli,15,0.045103,0.111198,0.1334068,0.013415,0.047357,0.234155,0.1538364,0.013156
5,barabasi,GRANMixtureBernoulli,50,0.025339,0.079182,0.1533513,0.014531,0.023334,0.223869,0.1114944,0.014782
6,community2,RNN,1000,0.042362,0.234516,0.3248894,0.107387,0.043783,0.308299,0.326366,0.110081
7,community4,RNN,1000,0.018449,0.067304,0.608483,0.028228,0.018187,0.064511,0.5924443,0.028171
8,community8,RNN,1000,0.062338,0.057058,0.6888835,0.05954,0.059766,0.055865,0.6393324,0.059826
9,watts,RNN,1000,1.6e-05,0.000261,7.397017e-08,0.011981,6e-06,2.1e-05,2.513227e-07,0.030172


In [10]:
result_df.style

Unnamed: 0,dataset_name,model_name,num_epochs,mmd_degree_dev,mmd_clustering_dev,mmd_4orbits_dev,mmd_spectral_dev,mmd_degree_test,mmd_clustering_test,mmd_4orbits_test,mmd_spectral_test
0,community2,GRANMixtureBernoulli,5,0.023199,0.252587,0.192528,0.08545,0.025109,0.326359,0.22042,0.08566
1,community4,GRANMixtureBernoulli,5,0.010807,0.085846,0.566761,0.024514,0.009778,0.08306,0.528068,0.022718
2,community8,GRANMixtureBernoulli,5,0.021233,0.070621,0.578829,0.006138,0.021949,0.070023,0.542716,0.006804
3,watts,GRANMixtureBernoulli,5,0.128493,0.030957,0.001776,0.177668,0.121638,0.032091,0.001825,0.204885
4,barabasi,GRANMixtureBernoulli,15,0.045103,0.111198,0.133407,0.013415,0.047357,0.234155,0.153836,0.013156
5,barabasi,GRANMixtureBernoulli,50,0.025339,0.079182,0.153351,0.014531,0.023334,0.223869,0.111494,0.014782
6,community2,RNN,1000,0.042362,0.234516,0.324889,0.107387,0.043783,0.308299,0.326366,0.110081
7,community4,RNN,1000,0.018449,0.067304,0.608483,0.028228,0.018187,0.064511,0.592444,0.028171
8,community8,RNN,1000,0.062338,0.057058,0.688883,0.05954,0.059766,0.055865,0.639332,0.059826
9,watts,RNN,1000,1.6e-05,0.000261,0.0,0.011981,6e-06,2.1e-05,0.0,0.030172
