In [1]:
from utils import load_data

from matplotlib import pyplot as plt
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [2]:
dataset = 'cora'

adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(dataset)
num_nodes = adj.shape[0]
num_features = features.shape[1]
num_classes = y_train.shape[1]

print("Total number of nodes: {}".format(adj.shape[0]))
print("Training nodes: {}".format(len(np.argwhere(y_train == True))))
print("Validation nodes: {}".format(len(np.argwhere(y_val == True))))
print("Test nodes: {}".format(len(np.argwhere(y_test == True))))
print("Num nodes: {}".format(num_nodes))
print("num_features: {}".format(num_features))

adj = np.array(adj.todense())

Total number of nodes: 2708
Training nodes: 140
Validation nodes: 500
Test nodes: 1000
Num nodes: 2708
num_features: 1433


In [3]:
# Compute the mean of all neighbouring features for the given node
def neighbouring_features(node):
    neighbours = adj[node]
    neighbour_indices = np.argwhere(neighbours == 1)[:, 0]
    neighbour_features = features[neighbour_indices]
    return np.mean(neighbour_features, axis=0)[0]


def compute_neighbouring_features():
    f = []
    for i in range(num_nodes):
        f.append(neighbouring_features(i))
    # f = [neighbouring_features(i) for i in range(num_nodes)]
    return np.array(f)

In [4]:
%%time
f = compute_neighbouring_features()
print(f.shape)

(2708, 1, 1433)
CPU times: user 17.4 s, sys: 77.7 ms, total: 17.5 s
Wall time: 17.5 s


In [5]:
f = f.reshape(-1, num_features)
dense_features = np.array(features.todense())

merged_features = np.concatenate([f, dense_features], axis=-1)

## SVM using node features alone

In [104]:
def normalize_features(features):
    """row-normalize features"""
    row_sum = np.array(np.sum(features, axis=1))
    r_inv = np.power(row_sum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0
    r_mat_inv = np.diag(r_inv)
    features = r_mat_inv.dot(features)
    return features

In [114]:
# dense_features = normalize_features(dense_features)
X_train_1 = dense_features[train_mask]
y_train_1 = np.argmax(y_train[train_mask], axis=-1)

X_val_1 = dense_features[val_mask]
y_val_1 = np.argmax(y_val[val_mask], axis=-1)

X_test_1 = dense_features[test_mask]
y_test_1 = np.argmax(y_test[test_mask], axis=-1)
print(X_test_1.shape)

(1000, 1433)


In [115]:
kernel = 'rbf'

best_val_acc = 0
val_accs= []

C_grid = np.logspace(-5, -15, num=21, base=2)
γ_grid = np.logspace(-15, 3, num=19, base=2)
# γ_grid = ['auto']

for C in C_grid:
    for γ in γ_grid:
        clf = SVC(kernel=kernel, C=C, gamma=γ)
        clf.fit(X_train_1, y_train_1)
        val_predictions = clf.predict(X_val_1)
        val_acc = len(np.argwhere(val_predictions == y_val_1)) / len(val_predictions)
        val_accs.append(val_acc)
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_C = C
            best_γ = γ

print("Kernel:", kernel)
print("Best C:", best_C)
print("Best γ:", best_γ)
print("Val acc:", best_val_acc)

Kernel: rbf
Best C: 0.03125
Best γ: 8.0
Val acc: 0.412


In [116]:
# %matplotlib
# plt.plot(C_grid, val_accs)

In [117]:
clf = SVC(kernel=kernel, C=best_C, gamma=best_γ)
clf.fit(X_train_1, y_train_1)
    
predictions = clf.predict(X_test_1)
test_accuracy = len(np.argwhere(predictions == y_test_1)) / len(predictions)
print(test_accuracy)

0.453


### Hyper-parameters and results
- RBF
    - C = 0.03125
    - $\gamma$ = 0.0078125
    - val acc = 0.564
    - test acc = 0.578
- poly
    - C = 0.011048543456039806
    - $\gamma$ = 0.25
    - val acc = 0.294
    - test acc = 0.306
- linear
    - C = 0.02209708691207961
    - val acc = 0.556
    - test acc = 0.588
- sigmoid
    - C = 0.03125
    - $\gamma$ = 6.103515625e-05
    - val acc = 0.554
    - test acc = 0.582

## SVM using node features and neighbours

In [29]:
X_train_2 = merged_features[train_mask]
y_train_2 = np.argmax(y_train[train_mask], axis=-1)

X_val_2 = merged_features[val_mask]
y_val_2 = np.argmax(y_val[val_mask], axis=-1)

X_test_2 = merged_features[test_mask]
y_test_2 = np.argmax(y_test[test_mask], axis=-1)
print(X_test_2.shape)

(1000, 2866)


In [21]:
# parameters = {'kernel':('linear', 'rbf'), 'C': np.arange(1, 100, 1)}
# svc = SVC()
# clf = GridSearchCV(svc, parameters)
# clf.fit(X_train, y_train)
# print(clf.best_params_)

In [97]:
kernel = 'linear'

best_val_acc = 0
val_accs= []

C_grid = np.logspace(-5, -15, num=21, base=2)
# γ_grid = np.logspace(-15, 3, num=19, base=2)
γ_grid = ['auto']

for C in C_grid:
    for γ in γ_grid:
        clf = SVC(kernel=kernel, C=C, gamma=γ)
        clf.fit(X_train_2, y_train_2)
        val_predictions = clf.predict(X_val_2)
        val_acc = len(np.argwhere(val_predictions == y_val_2)) / len(val_predictions)
        val_accs.append(val_acc)
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_C = C
            best_γ = γ

print("Kernel:", kernel)
print("Best C:", best_C)
print("Best γ:", best_γ)
print("Val acc:", best_val_acc)

Kernel: linear
Best C: 0.03125
Best γ: auto
Val acc: 0.7


In [98]:
# %matplotlib
# plt.plot(C_grid, val_accs)

In [99]:
clf = SVC(kernel=kernel, C=best_C, gamma=best_γ)
clf.fit(X_train_2, y_train_2)
    
predictions = clf.predict(X_test_2)
test_accuracy = len(np.argwhere(predictions == y_test_2)) / len(predictions)
print(test_accuracy)

0.731


### Hyper-parameters and results
- RBF
    - C = 0.03125
    - $\gamma$ = 0.0009765625
    - val acc = 0.654
    - test acc = 0.665
- poly
    - Best C: 0.02209708691207961
    - Best $\gamma$: 0.125
    - Val acc: 0.26
    - test acc = 0.251
- linear
    - C = 0.03125
    - val acc = 0.7
    - test acc = 0.731
- sigmoid
    - C = 0.03125
    - $\gamma$ = 0.125
    - val acc = 0.654
    - test acc = 0.684

Note: possible to get 76 using a linear kernel and C=5 (approximatively)

## Use simple feedforward neural net

In [6]:
from keras.models import Model
from keras.layers import Dense, Input, Dropout, BatchNormalization
from keras.regularizers import l2

Using TensorFlow backend.


In [7]:
def accuracy(predictions_probs, labels):
    predictions = np.argmax(predictions_probs, axis=-1)
    labels = np.argmax(labels, axis=-1)
    assert(predictions.shape == labels.shape)
    num_correct = len(np.argwhere(predictions == labels))
    accuracy = num_correct / len(labels)
    return accuracy

In [8]:
X_train_3 = merged_features[train_mask]
y_train_3 = y_train[train_mask]

X_val_3 = merged_features[val_mask]
y_val_3 = y_val[val_mask]
print(X_val_3.shape)
print(y_val_3.shape)
X_test_3 = merged_features[test_mask]
y_test_3 = y_test[test_mask]

(500, 2866)
(500, 7)


In [18]:
num_dense_features = X_train_3.shape[1]
print(num_dense_features)
batch_size = 32
num_epochs = 300
p = 0.2
reg_weight = 0.05

h_1 = 64
# h_2 = 32

inputs = Input(shape=(num_dense_features, ))
inputs_dropped = Dropout(p)(inputs)
x1 = Dense(h_1, activation='relu', kernel_regularizer=l2(reg_weight))(inputs_dropped)
# x1 = BatchNormalization()(x1)
x1_dropped = Dropout(p)(x1)
# x2 = Dense(h_2, activation='relu', kernel_regularizer=l2(reg_weight))(x1_dropped)
# x2_dropped = Dropout(p)(x2)
predictions = Dense(num_classes, activation='softmax')(x1_dropped)

model = Model(inputs=inputs, outputs=predictions)
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['categorical_accuracy'])

hist = model.fit(x=X_train_3, y=y_train_3, batch_size=batch_size, epochs=num_epochs, validation_data=(X_val_3, y_val_3), verbose=0)
print("train acc:", hist.history['categorical_accuracy'][-1])

val_predictions_probabilities =  model.predict(X_val_3, batch_size=batch_size)
print("Val acc:", accuracy(val_predictions_probabilities, y_val_3))

test_predictions_probabilities = model.predict(X_test_3, batch_size=batch_size)
print("Test acc:", accuracy(test_predictions_probabilities, y_test_3))

2866
train acc: 1.0
Val acc: 0.718
Test acc: 0.735


In [19]:
test_predictions_probabilities = model.predict(X_test_3, batch_size=batch_size)
print(accuracy(test_predictions_probabilities, y_test_3))

0.735


In [11]:
train_predictions_probabilities =  model.predict(X_train_3, batch_size=batch_size)
print(accuracy(train_predictions_probabilities, y_train_3))

1.0
