In [1]:
# For colab users
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from tqdm import tqdm

from tensorflow.keras import Sequential
from tensorflow.keras import layers as ly
from tensorflow.keras import Model

In [4]:
#cora_content = pd.read_csv('/content/drive/MyDrive/TA/NPEX4기/0804/cora/cora.content', sep='\t', header=None)
cora_content = pd.read_csv('./cora/cora.content', sep='\t', header=None)
cora_content.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1425,1426,1427,1428,1429,1430,1431,1432,1433,1434
0,31336,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Neural_Networks
1,1061127,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,Rule_Learning
2,1106406,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Reinforcement_Learning
3,13195,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Reinforcement_Learning
4,37879,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Probabilistic_Methods


In [5]:
ids = cora_content[0].values # paper(node) ids
vecs = cora_content[cora_content.columns[1:1434]].values # node features
labels = cora_content[1434].values # node label

print(np.unique(labels))

['Case_Based' 'Genetic_Algorithms' 'Neural_Networks'
 'Probabilistic_Methods' 'Reinforcement_Learning' 'Rule_Learning' 'Theory']


In [6]:
pd.value_counts(cora_content[1434])

Neural_Networks           818
Probabilistic_Methods     426
Genetic_Algorithms        418
Theory                    351
Case_Based                298
Reinforcement_Learning    217
Rule_Learning             180
Name: 1434, dtype: int64

In [7]:
# node label one hot encoding
labels_onehot = LabelEncoder().fit_transform(labels) # categorical label to numerical label
labels_onehot = np.expand_dims(labels_onehot, axis=1)
labels_onehot = OneHotEncoder().fit_transform(labels_onehot).toarray() # numerical label to onehot label

In [None]:
# # Details about LabelEnconder & OneHotEncoder
# print(labels)
# # 1) Generate label mapping
# le = LabelEncoder() 
# _labels_num = le.fit_transform(labels) # categorical label to numerical label
# print("-"*10, "Label Encoding Test", '-'*10)
# print("* le.classes_: \n", le.classes_)
# print("* labels[0:3]: \n", labels[0:3])
# print("* le.transform(): \n", le.transform(labels[0:3]))
# # 2) Reshape label
# # OneHotEncoder().fit_transform(labels) # Error Test
# _labels_num = np.expand_dims(_labels_num, axis=1)
# # 3) Transform label into 
# _labels_onehot = OneHotEncoder().fit_transform(_labels_num).toarray() # numerical label to onehot label
# print("* labels_onehot: \n", _labels_onehot[0:3])

In [8]:
inds = np.arange(ids.shape[0]) # use index at identifying each node
x = vecs
y = labels_onehot
print(ids.shape, x.shape, y.shape)

(2708,) (2708, 1433) (2708, 7)


In [9]:
num_classes = 7
num_per_train = 10
num_per_test = 100

# Transductive
x_train, x_test, y_train, y_test, idx_train, idx_test = train_test_split(x, y, inds, stratify=y,
                                                    train_size=num_classes*num_per_train,
                                                    test_size=num_classes*num_per_test,
                                                    random_state=42)

x_train, x_valid, y_train, y_valid, idx_train, idx_valid = train_test_split(x_train, y_train, idx_train,
                                                      stratify=y_train,
                                                      train_size=int(num_classes*num_per_train*0.8),
                                                      test_size=int(num_classes*num_per_train*0.2),
                                                      random_state=42)

print(idx_train.shape, x_train.shape, y_train.shape) # 10 examples per class
print(idx_valid.shape, x_valid.shape, y_valid.shape) # 10 examples per class
print(idx_test.shape, x_test.shape, y_test.shape) # 100 examples per class

# Quiz: How to check the effect of stratify option?

(56,) (56, 1433) (56, 7)
(14,) (14, 1433) (14, 7)
(700,) (700, 1433) (700, 7)


In [14]:
# # Example for stratify option
# # Test for stratify 
# x_train_o, x_test_o, y_train_o, y_test_o, idx_train_o, idx_test_o = train_test_split(x, y, inds,
#                                                     stratify=y,
#                                                     train_size=num_classes*num_per_train,
#                                                     test_size=num_classes*num_per_test,
#                                                     random_state=45)
# # Remove stratify option
# x_train_x, x_test_x, y_train_x, y_test_x, idx_train_x, idx_test_x = train_test_split(x, y, inds,
#                                                     train_size=num_classes*num_per_train,
#                                                     test_size=num_classes*num_per_test,
#                                                     random_state=45)
# def get_proportions(_arr):
#   _bincount = np.bincount(_arr)
#   _tot = _arr.shape[0]
#   return _bincount / _tot

# # Compare label balance
# print("* Original: \n", get_proportions(np.argmax(y, axis=1)))
# print("-------------- With stratify option ------------")
# print("* Train [o]: \n", get_proportions(np.argmax(y_train_o, axis=1)))
# print("* Test [o]: \n", get_proportions(np.argmax(y_test_o, axis=1)))
# print("-------------- Without stratify option ------------")
# print("* Train [x]: \n", get_proportions(np.argmax(y_train_x, axis=1)))
# print("* Test [x]: \n", get_proportions(np.argmax(y_test_x, axis=1)))

* Original: 
 [0.11004431 0.15435746 0.30206795 0.15731167 0.08013294 0.06646972
 0.12961595]
-------------- With stratify option ------------
* Train [o]: 
 [0.11428571 0.15714286 0.3        0.15714286 0.07142857 0.07142857
 0.12857143]
* Test [o]: 
 [0.11       0.15428571 0.30285714 0.15714286 0.08       0.06571429
 0.13      ]
-------------- Without stratify option ------------
* Train [x]: 
 [0.07142857 0.17142857 0.24285714 0.17142857 0.08571429 0.08571429
 0.17142857]
* Test [x]: 
 [0.11285714 0.14571429 0.30571429 0.15571429 0.08285714 0.07142857
 0.12571429]


## model

In [10]:
dnn = Sequential([
    ly.Dense(units=128, activation='relu', kernel_initializer='he_normal'),
    ly.Dense(units=64, activation='relu', kernel_initializer='he_normal'),
    ly.Dense(units=num_classes, kernel_initializer='he_normal')    
])

In [13]:
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
dnn.compile(optimizer='adam', loss=loss_fn, metrics=['acc'])
dnn.fit(x = x_train, y = y_train, batch_size=32, epochs=100, verbose=2, 
        validation_data=(x_valid, y_valid))
dnn.summary()

Epoch 1/100
2/2 - 0s - loss: 0.1432 - acc: 1.0000 - val_loss: 1.4287 - val_acc: 0.2857 - 389ms/epoch - 195ms/step
Epoch 2/100
2/2 - 0s - loss: 0.1109 - acc: 1.0000 - val_loss: 1.4040 - val_acc: 0.2857 - 19ms/epoch - 9ms/step
Epoch 3/100
2/2 - 0s - loss: 0.0836 - acc: 1.0000 - val_loss: 1.3852 - val_acc: 0.2857 - 22ms/epoch - 11ms/step
Epoch 4/100
2/2 - 0s - loss: 0.0651 - acc: 1.0000 - val_loss: 1.3712 - val_acc: 0.4286 - 22ms/epoch - 11ms/step
Epoch 5/100
2/2 - 0s - loss: 0.0509 - acc: 1.0000 - val_loss: 1.3610 - val_acc: 0.4286 - 28ms/epoch - 14ms/step
Epoch 6/100
2/2 - 0s - loss: 0.0397 - acc: 1.0000 - val_loss: 1.3539 - val_acc: 0.4286 - 24ms/epoch - 12ms/step
Epoch 7/100
2/2 - 0s - loss: 0.0308 - acc: 1.0000 - val_loss: 1.3509 - val_acc: 0.4286 - 22ms/epoch - 11ms/step
Epoch 8/100
2/2 - 0s - loss: 0.0239 - acc: 1.0000 - val_loss: 1.3487 - val_acc: 0.4286 - 21ms/epoch - 10ms/step
Epoch 9/100
2/2 - 0s - loss: 0.0195 - acc: 1.0000 - val_loss: 1.3499 - val_acc: 0.5000 - 22ms/epoch - 1

In [14]:
train_loss, train_acc = dnn.evaluate(x_train, y_train, verbose=0)
valid_loss, valid_acc = dnn.evaluate(x_valid, y_valid, verbose=0)
test_loss, test_acc = dnn.evaluate(x_test, y_test, verbose=0)

print("Train accuracy: ", train_acc)
print("Valid accuracy: ", valid_acc)
print("Test accuracy: ", test_acc)

Train accuracy:  1.0
Valid accuracy:  0.5
Test accuracy:  0.46000000834465027


In [15]:
# make adj matrix from citation information
def get_adj_matrix(ids):
    #cora_cites = np.loadtxt('/content/drive/MyDrive/TA/NPEX4기/0804/cora/cora.cites', dtype=np.int32)
    cora_cites = np.loadtxt('./cora/cora.cites', dtype=np.int32)
    N = ids.shape[0]
    adj_matrix = np.zeros(shape=(N, N), dtype=np.int32)
    
    # iterate over line
    for i in range(cora_cites.shape[0]):
        node1, node2 = cora_cites[i]
        idx1 = np.where(ids==node1)[0]
        idx2 = np.where(ids==node2)[0]
        
        # treat as undirected graph
        adj_matrix[idx1, idx2] = 1
        adj_matrix[idx2, idx1] = 1
    return adj_matrix
    
# make DAD(normalization) matrix
def get_norm_matrix(adj_matrix):
    a_tilda = adj_matrix + np.eye(adj_matrix.shape[0]) # A_ = A+I
    d_tilda = np.diag(1 / np.sqrt(np.sum(a_tilda, axis=1))) # D_^(-1/2) # D = sigma_j A_ij
    return np.matmul(np.matmul(d_tilda, a_tilda), d_tilda)

In [16]:
class GCN(Model):
    def __init__(self, A, input_dim=1433, hid_dim=64, num_classes=7, num_nodes=2708):
        super(GCN, self).__init__()
        self.A = tf.cast(A, dtype='float32')
        self.hid_dim = hid_dim
        w_init = tf.initializers.he_normal()
        
        self.W1 = self.add_weight(name='W1', 
                                  shape=(input_dim, self.hid_dim),
                                 initializer=w_init,
                                 trainable=True)     
        self.W2 = self.add_weight(name='W2',
                          shape=(self.hid_dim, num_classes),
                         initializer=w_init,
                         trainable=True)
        self.var_list = self.weights
            
    def call(self, x):
        x = tf.cast(x, "float32")
        L1 = tf.matmul(tf.matmul(self.A, x), self.W1)
        L1 = tf.nn.tanh(L1)
        
        L2 = tf.matmul(tf.matmul(self.A, L1), self.W2)
        return L2

    def loss_fn(self,logits, labels, indices):
        _labels = tf.gather_nd(labels, indices)
        _logits = tf.gather_nd(logits, indices)
        loss = tf.nn.softmax_cross_entropy_with_logits(labels=_labels, 
                                                       logits=_logits)
        return tf.reduce_mean(loss)
    
    def evaluate(self, x, labels, indices):
        logits = self.call(x)
        loss = self.loss_fn(logits, labels, indices)        
        _logits = tf.gather_nd(logits, indices)
        _labels = tf.gather_nd(labels, indices)
 
        pred = tf.argmax(_logits, axis=1)
        ans = tf.argmax(_labels, axis=1)
        correct = tf.equal(pred, ans)
        acc = tf.reduce_mean(tf.cast(correct, tf.float32))
        return loss, acc
    
    def train(self, x, labels, idx_train, idx_val, optimizer, max_epochs=20):
        for epoch in range(1, max_epochs+1):
            with tf.GradientTape() as tape:
                logits = self.call(x)
                train_loss = self.loss_fn(logits, labels, idx_train)
            
            grad_list = tape.gradient(train_loss, self.var_list)
            grads_and_vars = zip(grad_list, self.var_list)
            optimizer.apply_gradients(grads_and_vars)
            
            # Evaluation
            train_loss, train_acc = self.evaluate(x, labels, idx_train)
            valid_loss, valid_acc = self.evaluate(x, labels, idx_val)
            print(f"Epoch {epoch:3d}: {train_loss:.4f}, {train_acc*100:.2f}," 
                  ,f"{valid_loss:.4f}, {valid_acc*100:.2f}")
    

In [None]:
# # Example for gather_nd function
# print(y[0:6])
# # print(idx_train)
# print(tf.gather_nd(y,[[0], [2], [5]] ))
# # print(tf.gather_nd(y,[[0,2], [2,3], [5,3]] ))

In [None]:
# # Example of Gradient Tape (1)
# xx = tf.Variable(3.0)
# with tf.GradientTape() as tape:
#   yy = xx**3 + 2 *xx
# # dyy_dxx = 3 xx^2 + 2 --> 29
# dyy_dxx = tape.gradient(yy, xx)
# print(dyy_dxx)

In [None]:
# # Example of Gradient Tape (2)
# _x1 = tf.Variable(3.0)
# _x2 = tf.Variable(5.0)
# with tf.GradientTape(persistent=True) as tape2:
#   _loss = (10 - _x1 * _x2) ** 2 # (10 - 15) ^2 = 25/ (10+4)^2 = 196
# # print()
# # 0) optimizer 정의
# _opt = tf.keras.optimizers.SGD(learning_rate=0.01)
# print(_x1.numpy(), _x2.numpy())
# # 1) gradient 계산
# _grad_list = tape2.gradient(_loss, [_x1, _x2])
# print(_grad_list)
# # 2) gradient mapping
# # [1,2,3] [4,5,6] --> [1,4], [2,5], [3,6]
# _grads_and_vars = zip(_grad_list, [_x1, _x2])
# # 3) gradient descent (apply/ step)
# _opt.apply_gradients(_grads_and_vars)
# print(_x1.numpy(), _x2.numpy())

In [17]:
num_nodes, input_dim = x.shape[0], x.shape[1]

adj_matrix = get_adj_matrix(ids)
norm_matrix = get_norm_matrix(adj_matrix)

gcn = GCN(A = norm_matrix, input_dim=input_dim, hid_dim=64, num_classes=num_classes, num_nodes=num_nodes)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
_idx_train = np.expand_dims(idx_train, axis=1)
_idx_val = np.expand_dims(idx_valid, axis=1)

gcn.train(x=x, labels=y, idx_train=_idx_train, idx_val=_idx_val, optimizer=optimizer, max_epochs=20) 

Epoch   1: 1.3735, 83.93, 1.7167, 50.00
Epoch   2: 0.9199, 91.07, 1.5027, 71.43
Epoch   3: 0.5971, 100.00, 1.3303, 78.57
Epoch   4: 0.3781, 100.00, 1.1863, 78.57
Epoch   5: 0.2349, 100.00, 1.0679, 71.43
Epoch   6: 0.1450, 100.00, 0.9770, 71.43
Epoch   7: 0.0904, 100.00, 0.9131, 71.43
Epoch   8: 0.0575, 100.00, 0.8719, 71.43
Epoch   9: 0.0374, 100.00, 0.8476, 78.57
Epoch  10: 0.0249, 100.00, 0.8351, 78.57
Epoch  11: 0.0169, 100.00, 0.8308, 78.57
Epoch  12: 0.0118, 100.00, 0.8322, 78.57
Epoch  13: 0.0084, 100.00, 0.8375, 78.57
Epoch  14: 0.0061, 100.00, 0.8456, 78.57
Epoch  15: 0.0045, 100.00, 0.8554, 78.57
Epoch  16: 0.0035, 100.00, 0.8662, 78.57
Epoch  17: 0.0027, 100.00, 0.8776, 78.57
Epoch  18: 0.0022, 100.00, 0.8892, 78.57
Epoch  19: 0.0018, 100.00, 0.9008, 78.57
Epoch  20: 0.0015, 100.00, 0.9121, 78.57


In [18]:
test_loss, test_acc = gcn.evaluate(x, y, np.expand_dims(idx_test, axis=1))
print("Test accuracy: ", test_acc)

Test accuracy:  tf.Tensor(0.72571427, shape=(), dtype=float32)


In [20]:
gcn_loss, gcn_acc = gcn.evaluate(x, y, np.expand_dims(idx_test, axis=1))
dnn_loss, dnn_acc = dnn.evaluate(x_test, y_test, verbose=0)
print(f"[GCN] test loss: {gcn_loss:.4f}, test acc: {gcn_acc*100:.2f}")
print(f"[DNN] test loss: {dnn_loss:.4f}, test acc: {dnn_acc*100:.2f}")

[GCN] test loss: 0.9285, test acc: 72.57
[DNN] test loss: 1.8369, test acc: 46.00


In [None]:
# Qeustion: Why GCN outperforms DNN?