In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install networkx



In [None]:
%pip install -q stellargraph[demos]==1.2.1

[K     |████████████████████████████████| 435 kB 14.7 MB/s 
[K     |████████████████████████████████| 482 kB 33.6 MB/s 
[K     |████████████████████████████████| 462 kB 58.2 MB/s 
[K     |████████████████████████████████| 41 kB 384 kB/s 
[?25h  Building wheel for mplleaflet (setup.py) ... [?25l[?25hdone


In [None]:
import stellargraph as sg

In [None]:
import pandas as pd
import os

import stellargraph as sg
from stellargraph.mapper import FullBatchNodeGenerator
from stellargraph.layer import GCN

from tensorflow.keras import layers, optimizers, losses, metrics, Model
from sklearn import preprocessing, model_selection
from IPython.display import display, HTML
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping

%matplotlib inline

#### Loading CORA Dataset

In [None]:
dataset = sg.datasets.Cora()
display(HTML(dataset.description))
G, node_subjects = dataset.load()

In [None]:
print(G.info())

StellarGraph: Undirected multigraph
 Nodes: 2708, Edges: 5429

 Node types:
  paper: [2708]
    Features: float32 vector, length 1433
    Edge types: paper-cites->paper

 Edge types:
    paper-cites->paper: [5429]
        Weights: all 1 (default)
        Features: none


In [None]:
node_subjects.value_counts().to_frame()

Unnamed: 0,subject
Neural_Networks,818
Probabilistic_Methods,426
Genetic_Algorithms,418
Theory,351
Case_Based,298
Reinforcement_Learning,217
Rule_Learning,180


#### Preparing Train and test data

In [None]:
train_subjects, test_subjects = model_selection.train_test_split(
    node_subjects, train_size=140, test_size=None, stratify=node_subjects
)
val_subjects, test_subjects = model_selection.train_test_split(
    test_subjects, train_size=500, test_size=None, stratify=test_subjects
)

In [None]:
train_subjects.value_counts().to_frame()

Unnamed: 0,subject
Neural_Networks,42
Probabilistic_Methods,22
Genetic_Algorithms,22
Theory,18
Case_Based,16
Reinforcement_Learning,11
Rule_Learning,9


In [None]:
target_encoding = preprocessing.LabelBinarizer()

train_targets = target_encoding.fit_transform(train_subjects)
val_targets = target_encoding.transform(val_subjects)
test_targets = target_encoding.transform(test_subjects)

In [None]:
generator = FullBatchNodeGenerator(G, method="gcn")
train_gen = generator.flow(train_subjects.index, train_targets)
val_gen = generator.flow(val_subjects.index, val_targets)
test_gen = generator.flow(test_subjects.index, test_targets)

Using GCN (local pooling) filters...


#### Hyperparameter tuning

In [None]:
lrGCN = [0.005, 0.01]
layersGCN = [
          [16],
          [32],
          [64], [128], [16, 16], [32, 32], [64, 64], [16, 16, 16], [32, 32, 32], [64, 64, 64]
]

dropoutGCN = [0.2, 0.5, 0.7]

In [None]:
import tensorflow as tf
import pandas as pd

df_results = pd.DataFrame({'LR' : [], 'Layers': [], 'Dropout': [], 'ValAcc': []})

In [None]:
def getResults(lr, layer, dropout):
  activation = []
  for i in range(len(layer)):
    activation.append("relu")

  gcn = GCN(
    layer_sizes=layer, activations=activation, generator=generator, dropout=dropout
  )

  x_inp, x_out = gcn.in_out_tensors()

  predictions = layers.Dense(units=train_targets.shape[1], activation="softmax")(x_out)
  model = Model(inputs=x_inp, outputs=predictions)

  model.compile(
      optimizer=optimizers.Adam(lr=lr),
      loss=losses.categorical_crossentropy,
      metrics=["acc"],
  )

  es_callback = EarlyStopping(monitor="val_acc", patience=50, restore_best_weights=True)

  history = model.fit(
      train_gen,
      epochs=50,
      validation_data=val_gen,
      verbose=0,
      shuffle=False,  # this should be False, since shuffling data means shuffling the whole graph
      callbacks=[es_callback],
  )

  # sg.utils.plot_history(history)

  val_metrics = model.evaluate(val_gen)
  val_acc = model.evaluate(val_gen)[1]
  

  df2 = {'LR': lr, 'Layers': layer, 'Dropout': dropout, 'ValAcc': val_acc}
  df_results.append(df2, ignore_index=True)

  print("LR: {}, Layers: {}, Dropout: {}".format(lr, layer, dropout))
  print("Validation accuracy: ", val_acc)
  tf.keras.backend.clear_session()

In [None]:
iteration = 1
for l in lrGCN:
  for layer in layersGCN:
    for drop in dropoutGCN:
      print("Iteration : {}".format(iteration))
      getResults(l, layer, drop)
      iteration+= 1

Iteration : 1


  super(Adam, self).__init__(name, **kwargs)


LR: 0.005, Layers: [16], Dropout: 0.2
Validation accuracy:  0.7519999742507935
Iteration : 2
LR: 0.005, Layers: [16], Dropout: 0.5
Validation accuracy:  0.7680000066757202
Iteration : 3
LR: 0.005, Layers: [16], Dropout: 0.7
Validation accuracy:  0.7720000147819519
Iteration : 4
LR: 0.005, Layers: [32], Dropout: 0.2
Validation accuracy:  0.7760000228881836
Iteration : 5
LR: 0.005, Layers: [32], Dropout: 0.5
Validation accuracy:  0.7820000052452087
Iteration : 6
LR: 0.005, Layers: [32], Dropout: 0.7
Validation accuracy:  0.7720000147819519
Iteration : 7
LR: 0.005, Layers: [64], Dropout: 0.2
Validation accuracy:  0.7900000214576721
Iteration : 8
LR: 0.005, Layers: [64], Dropout: 0.5
Validation accuracy:  0.800000011920929
Iteration : 9
LR: 0.005, Layers: [64], Dropout: 0.7
Validation accuracy:  0.800000011920929
Iteration : 10
LR: 0.005, Layers: [128], Dropout: 0.2
Validation accuracy:  0.7860000133514404
Iteration : 11
LR: 0.005, Layers: [128], Dropout: 0.5
Validation accuracy:  0.800000

In [None]:
gcn = GCN(
  layer_sizes=[32, 32], activations=["relu", "relu"], generator=generator, dropout=0.7
)

x_inp, x_out = gcn.in_out_tensors()

predictions = layers.Dense(units=train_targets.shape[1], activation="softmax")(x_out)
model = Model(inputs=x_inp, outputs=predictions)

model.compile(
    optimizer=optimizers.Adam(lr=0.005),
    loss=losses.categorical_crossentropy,
    metrics=["acc"],
)

es_callback = EarlyStopping(monitor="val_acc", patience=50, restore_best_weights=True)

history = model.fit(
    train_gen,
    epochs=200,
    validation_data=val_gen,
    verbose=0,
    shuffle=False,  # this should be False, since shuffling data means shuffling the whole graph
    callbacks=[es_callback],
)

# sg.utils.plot_history(history)

val_metrics = model.evaluate(val_gen)
val_acc = model.evaluate(val_gen)[1]


  super(Adam, self).__init__(name, **kwargs)




In [None]:
test_acc = model.evaluate(test_gen)[1]

