In [1]:
from utils import load_data

from matplotlib import pyplot as plt
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [17]:
dataset = 'cora'

adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(dataset)
num_nodes = adj.shape[0]
num_features = features.shape[1]
num_classes = y_train.shape[1]

print("Total number of nodes: {}".format(adj.shape[0]))
print("Training nodes: {}".format(len(np.argwhere(y_train == True))))
print("Validation nodes: {}".format(len(np.argwhere(y_val == True))))
print("Test nodes: {}".format(len(np.argwhere(y_test == True))))
print("Num nodes: {}".format(num_nodes))
print("num_features: {}".format(num_features))

adj = np.array(adj.todense())

Total number of nodes: 2708
Training nodes: 140
Validation nodes: 500
Test nodes: 1000
Num nodes: 2708
num_features: 1433


In [3]:
# Compute the mean of all neighbouring features for the given node
def neighbouring_features(node):
    neighbours = adj[node]
    neighbour_indices = np.argwhere(neighbours == 1)[:, 0]
    neighbour_features = features[neighbour_indices]
    return np.mean(neighbour_features, axis=0)[0]


def compute_neighbouring_features():
    f = []
    for i in range(num_nodes):
        f.append(neighbouring_features(i))
    # f = [neighbouring_features(i) for i in range(num_nodes)]
    return np.array(f)

In [4]:
%%time
f = compute_neighbouring_features()
print(f.shape)

(2708, 1, 1433)
CPU times: user 16.7 s, sys: 34.8 ms, total: 16.7 s
Wall time: 16.7 s


In [5]:
f = f.reshape(-1, num_features)
dense_features = np.array(features.todense())

merged_features = np.concatenate([f, dense_features], axis=-1)

## SVM using node features alone

In [28]:
X_train_1 = dense_features[train_mask]
y_train_1 = np.argmax(y_train[train_mask], axis=-1)

X_val_1 = dense_features[val_mask]
y_val_1 = np.argmax(y_val[val_mask], axis=-1)

X_test_1 = dense_features[test_mask]
y_test_1 = np.argmax(y_test[test_mask], axis=-1)
print(X_test_1.shape)

(1000, 1433)


In [None]:
kernel = 'rbf'

best_val_acc = 0
val_accs= []

C_grid = np.logspace(-5, -15, num=21, base=2)
γ_grid = np.logspace(-15, 3, num=19, base=2)
# γ_grid = ['auto']

for C in C_grid:
    for γ in γ_grid:
        clf = SVC(kernel=kernel, C=C, gamma=γ)
        clf.fit(X_train_1, y_train_1)
        val_predictions = clf.predict(X_val_1)
        val_acc = len(np.argwhere(val_predictions == y_val_1)) / len(val_predictions)
        val_accs.append(val_acc)
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_C = C
            best_γ = γ

print("Kernel:", kernel)
print("Best C:", best_C)
print("Best γ:", best_γ)
print("Val acc:", best_val_acc)

In [None]:
# %matplotlib
# plt.plot(C_grid, val_accs)

In [None]:
clf = SVC(kernel=kernel, C=best_C, gamma=best_γ)
clf.fit(X_train_1, y_train_1)
    
predictions = clf.predict(X_test_1)
test_accuracy = len(np.argwhere(predictions == y_test_1)) / len(predictions)
print(test_accuracy)

### Hyper-parameters and results
- RBF
    - C = 0.03125
    - $\gamma$ = 0.0078125
    - val acc = 
    - test acc = 0.578
- poly
    - C = 0.011048543456039806
    - $\gamma$ = 0.25
    - val acc = 
    - test acc = 0.306
- linear
    - C = 0.02209708691207961
    - val acc = 
    - test acc = 0.588
- sigmoid
    - C = 0.03125
    - $\gamma$ = 6.103515625e-05
    - val acc = 0.554
    - test acc = 0.582

## SVM using node features and neighbours

In [29]:
X_train_2 = merged_features[train_mask]
y_train_2 = np.argmax(y_train[train_mask], axis=-1)

X_val_2 = merged_features[val_mask]
y_val_2 = np.argmax(y_val[val_mask], axis=-1)

X_test_2 = merged_features[test_mask]
y_test_2 = np.argmax(y_test[test_mask], axis=-1)
print(X_test_2.shape)

(1000, 2866)


In [21]:
# parameters = {'kernel':('linear', 'rbf'), 'C': np.arange(1, 100, 1)}
# svc = SVC()
# clf = GridSearchCV(svc, parameters)
# clf.fit(X_train, y_train)
# print(clf.best_params_)

In [26]:
%%time
kernel = 'linear'
best_val_acc = 0
val_accs= []
C_grid = np.logspace(1, 4, 40)
for C in C_grid:
    clf = SVC(kernel=kernel, C=C)
    clf.fit(X_train_2, y_train_2)
    val_predictions = clf.predict(X_val_2)
    val_acc = len(np.argwhere(val_predictions == y_val_2)) / len(val_predictions)
    val_accs.append(val_acc)
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_C = C

print(kernel)
print(best_val_acc)
print(best_C)

[   10.            11.93776642    14.2510267     17.0125428
    20.30917621    24.24462017    28.94266125    34.55107295
    41.24626383    49.23882632    58.78016072    70.17038287
    83.76776401   100.           119.37766417   142.51026703
   170.12542799   203.09176209   242.44620171   289.42661247
   345.51072946   412.46263829   492.38826317   587.80160723
   701.70382867   837.67764007  1000.          1193.77664171
  1425.1026703   1701.25427985  2030.9176209   2424.46201708
  2894.26612472  3455.10729459  4124.6263829   4923.88263171
  5878.01607227  7017.0382867   8376.77640068 10000.        ]
linear
0.692
10.0
CPU times: user 11.8 s, sys: 63.6 ms, total: 11.9 s
Wall time: 11.9 s


In [23]:
%matplotlib
plt.plot(C_grid, val_accs)

Using matplotlib backend: TkAgg


[<matplotlib.lines.Line2D at 0x11927aba8>]

In [25]:
clf = SVC(kernel='linear', C=best_C)
clf.fit(X_train_2, y_train_2)
    
predictions = clf.predict(X_test_2)
test_accuracy = len(np.argwhere(predictions == y_test_2)) / len(predictions)
print(test_accuracy)

0.729


Note: possible to get 76 using a linear kernel and C=5 (approximatively)