---
# Demo for the Use case of graphs with various attribute correlations
---

We show 
1. how users can generate graphs with various attribute correlations, 
2. how users can execute GNNs on the generated graphs.


In [1]:
!pip install powerlaw
!pip install jgraph
!pip install tensorflow-gpu==1.15.5

Collecting powerlaw
  Downloading powerlaw-1.5-py3-none-any.whl (24 kB)
Installing collected packages: powerlaw
Successfully installed powerlaw-1.5
Collecting jgraph
  Downloading jgraph-0.2.1-py2.py3-none-any.whl (119 kB)
[K     |████████████████████████████████| 119 kB 7.0 MB/s 
Installing collected packages: jgraph
Successfully installed jgraph-0.2.1
Collecting tensorflow-gpu==1.15.5
  Downloading tensorflow_gpu-1.15.5-cp37-cp37m-manylinux2010_x86_64.whl (411.0 MB)
[K     |████████████████████████████████| 411.0 MB 25 kB/s 
Collecting h5py<=2.10.0
  Downloading h5py-2.10.0-cp37-cp37m-manylinux1_x86_64.whl (2.9 MB)
[K     |████████████████████████████████| 2.9 MB 56.2 MB/s 
Collecting tensorboard<1.16.0,>=1.15.0
  Downloading tensorboard-1.15.0-py3-none-any.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 55.6 MB/s 
Collecting keras-applications>=1.0.8
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 9.0

In [2]:
drive_root = "/content/drive/My Drive/Colab Notebooks/GenCAT/"
drive_root_command = "/content/drive/MyDrive/Colab\ Notebooks/GenCAT/"

import sys
import os.path
sys.path.append(os.path.join(drive_root))
import numpy as np
import time
import pandas as pd

import gencat
from func import cpm_cpd_plot, att_plot, loadData, _load_data, save_graph, compute_graph_statistics, feature_extraction

# Load Cora dataset

In [3]:
dataset_str = "cora"
adj, features, labels = _load_data(dataset_str)

# Generate Variants of Attribute-Class Correlations
where $\beta$ indicates a scaling parameter for attribute-class correlation. 
If $\beta$ is large, nodes have more non-zero attribute values, i.e., the attributes should be more informative to predict node labels. 

In [4]:
### attribute variants
import copy
tag = "_att_variants_"
beta_l =[256,128,64,32,16,8,4,2,1,0,-1]

In [None]:
for beta in beta_l:
  M,D,class_size,H,theta = feature_extraction(adj,features,labels)
  H_ = copy.deepcopy(H)
  if beta != -1:
    H_ = (H_ + beta *np.average(H)) / (beta+1)
  else: # complete random
    print("random!")
    H_ = np.zeros(H.shape) + np.average(H)

  tag_ = tag + str(beta) + "_"
  # H_ = np.zeros(H.shape) + np.average(H)

  # H_ = beta*H
  for _ in range(3):
  # for _ in [1,2]:
    S,X,Label = gencat.gencat(M,D,H_,class_size=class_size,theta=theta,att_type="Bernoulli")
    out_planetoid(S,X,Label,dataset_str="GenCAT_"+dataset_str+tag_+str(_))

# Excecution of Graph Analytic Methods
+ GCN (ICLR 2017)
+ GAT (ICLR 2018)
+ H2GCN (NeurIPS 2020)

# GCN

In [5]:
data = "GenCAT_cora"

method = drive_root_command + "gcn-master/gcn/train.py"

for beta in beta_l:
  data_ = data + tag + str(beta) + "_"
# for _ in [9]:
# for _ in range(10):
  # data_ = data + tag + str(_) + "_" 
  # for dataIter in range(3): 
  for dataIter in [0]: 
    data_in = data_ + str(dataIter)
    print(data_in)
    # for i in range(3):
    for i in [0]:
      start = time.time()
      # magic command
      !python $method --dataset $data_in --iter_count $i
      elapsed_time = time.time() - start
      print ("elapsed_time:{0}".format(elapsed_time) + "[sec]")

GenCAT_cora_att_variants_256_0
1 Physical GPUs, 1 Logical GPUs
Early stopping...
elapsed_time:13.772454738616943[sec]
Optimization Finished!
Test set results: cost= 0.98289 accuracy= 0.80961 time= 0.00457
elapsed_time:23.98886013031006[sec]
GenCAT_cora_att_variants_128_0
1 Physical GPUs, 1 Logical GPUs
Early stopping...
elapsed_time:12.69774341583252[sec]
Optimization Finished!
Test set results: cost= 1.03920 accuracy= 0.77264 time= 0.00458
elapsed_time:18.975985765457153[sec]
GenCAT_cora_att_variants_64_0
1 Physical GPUs, 1 Logical GPUs
Early stopping...
elapsed_time:13.93307876586914[sec]
Optimization Finished!
Test set results: cost= 0.95594 accuracy= 0.82440 time= 0.00457
elapsed_time:20.067970514297485[sec]
GenCAT_cora_att_variants_32_0
1 Physical GPUs, 1 Logical GPUs
Early stopping...
elapsed_time:10.583963871002197[sec]
Optimization Finished!
Test set results: cost= 1.06635 accuracy= 0.78928 time= 0.00444
elapsed_time:16.661468267440796[sec]
GenCAT_cora_att_variants_16_0
1 Physi

# GAT

In [4]:
data = "GenCAT_cora"

tag = "_att_variants_"
method = drive_root_command + "GAT-master/execute_cora_sparse.py"

for beta in beta_l:
# for beta in [256]:
  data_ = data + tag + str(beta) + "_" 
  # for dataIter in range(3):
  for dataIter in [0]: 
    data_in = data_ + str(dataIter)
    print(data_in)
    # for i in range(1):
    for i in [0]:
      start = time.time()
      !python $method --dataset $data_in --iter_count $i --l2_coef 5e-5 
      elapsed_time = time.time() - start
      print ("elapsed_time:{0}".format(elapsed_time) + "[sec]")

GenCAT_cora_att_variants_256_0
1 Physical GPUs, 1 Logical GPUs
Early stop! Min loss:  0.5915384292602539 , Max accuracy:  0.8479261994361877
Early stop model validation loss:  0.5915384292602539 , accuracy:  0.8479261994361877
Test loss: 0.6009402275085449 ; Test accuracy: 0.831792950630188
elapsed_time:230.51469373703003[sec]
elapsed_time:233.72308087348938[sec]
GenCAT_cora_att_variants_128_0
1 Physical GPUs, 1 Logical GPUs
Early stop! Min loss:  0.6496205925941467 , Max accuracy:  0.8341013193130493
Early stop model validation loss:  0.6579115390777588 , accuracy:  0.828341007232666
Test loss: 0.6367918848991394 ; Test accuracy: 0.8317928910255432
elapsed_time:248.34567284584045[sec]
elapsed_time:251.7947063446045[sec]
GenCAT_cora_att_variants_64_0
1 Physical GPUs, 1 Logical GPUs
Early stop! Min loss:  0.6081442832946777 , Max accuracy:  0.8605990409851074
Early stop model validation loss:  0.6261594891548157 , accuracy:  0.8571428656578064
Test loss: 0.5899671316146851 ; Test accura

# H2GCN 
This method requires a higher version of tensorflow than GCN and GAT. 

In [None]:
!pip install tensorflow-gpu==2.0.0

Collecting tensorflow-gpu==2.0.0
  Downloading tensorflow_gpu-2.0.0-cp37-cp37m-manylinux2010_x86_64.whl (380.8 MB)
[K     |████████████████████████████████| 380.8 MB 40 kB/s 
Collecting tensorboard<2.1.0,>=2.0.0
  Downloading tensorboard-2.0.2-py3-none-any.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 53.3 MB/s 
Collecting tensorflow-estimator<2.1.0,>=2.0.0
  Downloading tensorflow_estimator-2.0.1-py2.py3-none-any.whl (449 kB)
[K     |████████████████████████████████| 449 kB 65.5 MB/s 
Installing collected packages: tensorflow-estimator, tensorboard, tensorflow-gpu
  Attempting uninstall: tensorflow-estimator
    Found existing installation: tensorflow-estimator 1.15.1
    Uninstalling tensorflow-estimator-1.15.1:
      Successfully uninstalled tensorflow-estimator-1.15.1
  Attempting uninstall: tensorboard
    Found existing installation: tensorboard 1.15.0
    Uninstalling tensorboard-1.15.0:
      Successfully uninstalled tensorboard-1.15.0
  Attempting uninstall:

In [None]:
data = "GenCAT_cora"

data_ind = data
dataset_path = drive_root_command + "datasets"

tag = "_att_variants_"
beta_l =[256,128,64,32,16,8,4,2,1,0,-1]

method = drive_root_command + "H2GCN-master/h2gcn/run_experiments.py"


# for _ in [9]:
for beta in beta_l:
  data_ = data_ind + tag + str(beta) + "_"
# for _ in range(10):
  # data_in = data_ + tag + str(_) + "_"
  for dataIter in range(3):
  # for dataIter in [0]: 
    data_in = data_ + str(dataIter)
    print(data_in)
    for i in range(3):
    # for i in [1,2]:
      start = time.time()
      !python $method H2GCN planetoid --dataset $data_in --iter_count $i
      #  --dataset_path $dataset_path
      # elapsed_time = time.time() - start
      # print ("elapsed_time:{0}".format(elapsed_time) + "[sec]")

GenCAT_cora_att_variants_256_0
1 Physical GPUs, 1 Logical GPUs
Model: "h2gcn"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sparse_dense (SparseDense)   multiple                  91712     
_________________________________________________________________
re_lu (ReLU)                 multiple                  0         
_________________________________________________________________
gcn_layer (GCNLayer)         multiple                  0         
_________________________________________________________________
flatten (Flatten)            multiple                  0         
_________________________________________________________________
gcn_layer_1 (GCNLayer)       multiple                  0         
_________________________________________________________________
flatten_1 (Flatten)          multiple                  0         
________________________________________________________________