---
# Demo for the Use case of graphs with various class preference means
---

We show 
1. how users can generate graphs with various class preference means, 
2. how users can execute GNNs on the generated graphs.


In [1]:
!pip install powerlaw
!pip install jgraph
!pip install tensorflow-gpu==1.15.5

Collecting powerlaw
  Downloading powerlaw-1.5-py3-none-any.whl (24 kB)
Installing collected packages: powerlaw
Successfully installed powerlaw-1.5
Collecting jgraph
  Downloading jgraph-0.2.1-py2.py3-none-any.whl (119 kB)
[K     |████████████████████████████████| 119 kB 5.4 MB/s 
Installing collected packages: jgraph
Successfully installed jgraph-0.2.1


In [3]:
drive_root = "/content/drive/My Drive/Colab Notebooks/GenCAT/"
drive_root_command = "/content/drive/MyDrive/Colab\ Notebooks/GenCAT/"

import sys
import os.path
sys.path.append(os.path.join(drive_root))
import numpy as np
import time
import pandas as pd

import gencat
from func import cpm_cpd_plot, att_plot, loadData, _load_data, save_graph, compute_graph_statistics, feature_extraction

# Load Cora dataset

In [None]:
dataset_str = "cora"
adj, features, labels = _load_data(dataset_str)
# adj, features, labels = loadData(path=drive_root+"/pyGAT-master/data/cora/")

# Gerarate Variants of Class Preference Mean/Deviation

In [None]:
tag = "_cpm_variants_"
import copy

In [None]:
def config_class_preference_mean(M,D,x=1):
  import copy
  k = M.shape[0] # number of classes
  M_ = copy.deepcopy(M)
  D_ = copy.deepcopy(D)
  if x != 0:
    for i in range(k): # for each diagonal element
  # for i in range(int(k/2)+1,k):
      for j in range(k):
        if i == j:
          M_[i][j] -= 0.1 * x
        else:
          M_[i][j] += (0.1 * x) / (k-1)
      M_[M_<0] = 0.01
    for i in range(k):
    # for i in range(int(k/2)+1,k):
      for j in range(k):
        if i == j:
          D_[i][j] = D_[i][j] * (M_[i][j]/(M_[i][j]+0.1*x))
        else:
          D_[i][j] = D_[i][j] * (M_[i][j]/(M_[i][j]- (0.1)/(k-1)))
    for i in range(k):
      M_[i] = M_[i] / sum(M_[i])    
    D_[D_<=0] = 0
  return M_, D_

In [None]:
# for x in [8,9]:
for x in range(9):
  M,D,class_size,H,theta = feature_extraction(adj,features,labels)
  # k = M.shape[0] # number of classes
  tag_ = tag + str(x) + "_"

  M_, D_ = config_class_preference_mean(M,D,x=x)

  for _ in range(3):
  # for _ in [1,2]:
    S,X,Label = gencat.gencat(M_,D_,H,class_size=class_size,theta=theta,att_type="Bernoulli")
    out_planetoid(S,X,Label,dataset_str="GenCAT_"+dataset_str+tag_+str(_))
    _ = cpm_cpd_plot(S,Label)

Output hidden; open in https://colab.research.google.com to view.

# Excecution of Graph Analytic Methods
+ GCN (ICLR 2017)
+ GAT (ICLR 2018)
+ H2GCN (NeurIPS 2020)
+ MLP (a graph-agnostic baseline)

# GCN

In [None]:
data = "GenCAT_cora"

method = drive_root_command + "gcn-master/gcn/train.py"

for _ in range(9):
  data_ = data + tag + str(_) + "_" 
  for dataIter in range(3):
    data_in = data_ + str(dataIter)
    for i in range(3):
      start = time.time()
      # magic command
      !python $method --dataset $data_in --iter_count $i
      elapsed_time = time.time() - start
      print ("elapsed_time:{0}".format(elapsed_time) + "[sec]")

1 Physical GPUs, 1 Logical GPUs
Early stopping...
elapsed_time:9.600191354751587[sec]
Optimization Finished!
Test set results: cost= 0.71709 accuracy= 0.86506 time= 0.00429
elapsed_time:19.10181212425232[sec]
1 Physical GPUs, 1 Logical GPUs
Early stopping...
elapsed_time:9.111247301101685[sec]
Optimization Finished!
Test set results: cost= 0.68793 accuracy= 0.86137 time= 0.00434
elapsed_time:12.166223764419556[sec]
1 Physical GPUs, 1 Logical GPUs
Early stopping...
elapsed_time:11.28701114654541[sec]
Optimization Finished!
Test set results: cost= 0.64865 accuracy= 0.88355 time= 0.00462
elapsed_time:14.271116256713867[sec]
1 Physical GPUs, 1 Logical GPUs
Early stopping...
elapsed_time:10.592635869979858[sec]
Optimization Finished!
Test set results: cost= 0.66415 accuracy= 0.90573 time= 0.00489
elapsed_time:13.57239580154419[sec]
1 Physical GPUs, 1 Logical GPUs
Early stopping...
elapsed_time:10.627156019210815[sec]
Optimization Finished!
Test set results: cost= 0.64823 accuracy= 0.90203 t

# GAT

In [None]:
data = "GenCAT_cora"

tag = "_cpm_variants_"
method = drive_root_command + "GAT-master/execute_cora_sparse.py"

for _ in range(9):
  data_ = data + tag + str(_) + "_" 
  for dataIter in range(3):
    data_in = data_ + str(dataIter)
    print(data_in)
    for i in range(3):
      start = time.time()
      !python $method --dataset $data_in --iter_count $i --l2_coef 5e-5 
      elapsed_time = time.time() - start
      print ("elapsed_time:{0}".format(elapsed_time) + "[sec]")

GenCAT_cora_cpm_variants_0_0
1 Physical GPUs, 1 Logical GPUs
Early stop! Min loss:  0.3971219062805176 , Max accuracy:  0.8986175656318665
Early stop model validation loss:  0.4032749533653259 , accuracy:  0.8963133692741394
Test loss: 0.39871200919151306 ; Test accuracy: 0.8853973150253296
elapsed_time:217.40462636947632[sec]
elapsed_time:224.00876879692078[sec]
1 Physical GPUs, 1 Logical GPUs
Early stop! Min loss:  0.39609503746032715 , Max accuracy:  0.9020736217498779
Early stop model validation loss:  0.45886364579200745 , accuracy:  0.8951612710952759
Test loss: 0.393679678440094 ; Test accuracy: 0.8853973150253296
elapsed_time:223.02576303482056[sec]
elapsed_time:226.243727684021[sec]
GenCAT_cora_cpm_variants_0_1
1 Physical GPUs, 1 Logical GPUs
Early stop! Min loss:  0.36411091685295105 , Max accuracy:  0.9101381301879883
Early stop model validation loss:  0.3656434416770935 , accuracy:  0.9101381301879883
Test loss: 0.38164958357810974 ; Test accuracy: 0.9057300090789795
elapse

# H2GCN 
This method requires a higher version of tensorflow than GCN and GAT. 

In [None]:
!pip install tensorflow-gpu==2.0.0

Collecting tensorflow-gpu==2.0.0
  Downloading tensorflow_gpu-2.0.0-cp37-cp37m-manylinux2010_x86_64.whl (380.8 MB)
[K     |████████████████████████████████| 380.8 MB 42 kB/s 
Collecting tensorflow-estimator<2.1.0,>=2.0.0
  Downloading tensorflow_estimator-2.0.1-py2.py3-none-any.whl (449 kB)
[K     |████████████████████████████████| 449 kB 66.1 MB/s 
Collecting tensorboard<2.1.0,>=2.0.0
  Downloading tensorboard-2.0.2-py3-none-any.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 36.5 MB/s 
Installing collected packages: tensorflow-estimator, tensorboard, tensorflow-gpu
  Attempting uninstall: tensorflow-estimator
    Found existing installation: tensorflow-estimator 1.15.1
    Uninstalling tensorflow-estimator-1.15.1:
      Successfully uninstalled tensorflow-estimator-1.15.1
  Attempting uninstall: tensorboard
    Found existing installation: tensorboard 1.15.0
    Uninstalling tensorboard-1.15.0:
      Successfully uninstalled tensorboard-1.15.0
  Attempting uninstall:

In [5]:
data = "GenCAT_cora"

data_ind = data
dataset_path = drive_root_command + "datasets"

tag = "_cpm_variants_"
method = drive_root_command + "H2GCN-master/h2gcn/run_experiments.py"

for _ in range(9):
  data_ = data_ind + tag + str(_) + "_"
  print(data_)
  for dataIter in range(3):
    data_in = data_ + str(dataIter)
    for i in range(3):
      !python $method H2GCN planetoid --dataset $data_in --iter_count $i 

GenCAT_cora_cpm_variants_0_
1 Physical GPUs, 1 Logical GPUs
Model: "h2gcn"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sparse_dense (SparseDense)   multiple                  91712     
_________________________________________________________________
re_lu (ReLU)                 multiple                  0         
_________________________________________________________________
gcn_layer (GCNLayer)         multiple                  0         
_________________________________________________________________
flatten (Flatten)            multiple                  0         
_________________________________________________________________
gcn_layer_1 (GCNLayer)       multiple                  0         
_________________________________________________________________
flatten_1 (Flatten)          multiple                  0         
_________________________________________________________________
c

---
# MLP

In [None]:
from mlp import run_mlp

In [None]:
tag = '_cpm_variants_'
data = "GenCAT_cora"
for _ in range(9):
  data_ = data + tag + str(_) + "_" 
  for dataIter in range(3):
    data_in = data_ + str(dataIter)
    for i in range(3):
      run_mlp(data_in, drive_root, iter_count=i)

accuracy:  0.6765249537892791
elapsed_time:2.2596356868743896[sec]
accuracy:  0.66728280961183
elapsed_time:2.497260093688965[sec]
accuracy:  0.6524953789279113
elapsed_time:2.421661376953125[sec]
accuracy:  0.6654343807763401
elapsed_time:1.9705004692077637[sec]
accuracy:  0.6654343807763401
elapsed_time:1.7358062267303467[sec]
accuracy:  0.6728280961182994
elapsed_time:2.4189770221710205[sec]
accuracy:  0.6451016635859519
elapsed_time:1.9743497371673584[sec]
accuracy:  0.634011090573013
elapsed_time:2.3825414180755615[sec]
accuracy:  0.6377079482439926
elapsed_time:2.005021810531616[sec]
accuracy:  0.6303142329020333
elapsed_time:1.8889427185058594[sec]
accuracy:  0.6081330868761553
elapsed_time:2.103684663772583[sec]
accuracy:  0.6192236598890942
elapsed_time:1.964792013168335[sec]
accuracy:  0.6524953789279113
elapsed_time:1.9030156135559082[sec]
accuracy:  0.6635859519408502
elapsed_time:1.8460886478424072[sec]
accuracy:  0.6524953789279113
elapsed_time:1.7407021522521973[sec]
acc