In [10]:
import dgl
import dgl.function as fn
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph
import time
import numpy as np
from scipy.sparse import coo_matrix

gcn_msg = fn.copy_u(u='h', out='m')
gcn_reduce = fn.sum(msg='m', out='h')

In [2]:
class GCNLayer(nn.Module):
    def __init__(self, in_feats, out_feats):
        super(GCNLayer, self).__init__()
        self.linear = nn.Linear(in_feats, out_feats)

    def forward(self, g, feature):
        # Creating a local scope so that all the stored ndata and edata
        # (such as the `'h'` ndata below) are automatically popped out
        # when the scope exits.
        with g.local_scope():
            g.ndata['h'] = feature
            g.update_all(gcn_msg, gcn_reduce)
            h = g.ndata['h']
            return self.linear(h)

In [8]:
class Net(nn.Module):
    def __init__(self, L1_in_feats, L1_out_feats, L2_in_feats, L2_out_feats):
        super(Net, self).__init__()
        self.layer1 = GCNLayer(L1_in_feats, L1_out_feats)
        self.layer2 = GCNLayer(L2_in_feats, L2_out_feats)
#         self.verbose = verbose

    def forward(self, g, features, verbose):
        x1 = F.relu(self.layer1(g, features))
        x2 = self.layer2(g, x1)
        if verbose == True:
#             print(x1)
            return x1
        else:
            return x2

In [5]:
def evaluate(model, g, features, labels, mask):
    model.eval()
    with th.no_grad():
        logits = model(g, features, False)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = th.max(logits, dim=1)
        correct = th.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)

In [24]:
def load_dataset(dataset):
    data_set = dataset
    g = data_set[0]
    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    test_mask = g.ndata['test_mask']
    return g, features, labels, train_mask, test_mask

In [14]:
def save_to_file(file_name, coo_array):
    file = open(file_name, "w")
    hidden_feature_size = len(coo_array.data)
    print(str(coo_array.shape[0])+" "+str(coo_array.shape[1]), file=file)
    print(hidden_feature_size, file=file)

    for i in range(hidden_feature_size):
        print(str(coo_array.row[i])+" "+str(coo_array.col[i])+" "+str(coo_array.data[i]), file=file)
    file.close()

In [13]:
####################    Cora Dataset     #####################

from dgl.data import CoraGraphDataset
net = Net(1433, 16, 16, 7)
print(net)
g, features, labels, train_mask, test_mask = load_dataset(dataset = CoraGraphDataset())
# Add edges between each node and itself to preserve old node representations
g.add_edges(g.nodes(), g.nodes())
optimizer = th.optim.Adam(net.parameters(), lr=1e-2)
dur = []
num_epoch = 50
for epoch in range(num_epoch):
    t0 = time.time()

    net.train()
#     golden = net.layer1
    logits = net(g, features, False) # forward
    logp = F.log_softmax(logits, 1)
    loss = F.nll_loss(logp[train_mask], labels[train_mask])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    dur.append(time.time() - t0)
    acc = evaluate(net, g, features, labels, test_mask) # forward
    print("Epoch {:05d} | Loss {:.4f} | Test Acc {:.4f} | Time(s) {:.4f}".format(
            epoch, loss.item(), acc, np.mean(dur)))
    
hidden_feature = net(g, features, True)
coo_hidden_feature = coo_matrix(hidden_feature.detach().numpy())
###  save to file ###
save_to_file("Cora_feat_L2.txt", coo_hidden_feature)

Net(
  (layer1): GCNLayer(
    (linear): Linear(in_features=1433, out_features=16, bias=True)
  )
  (layer2): GCNLayer(
    (linear): Linear(in_features=16, out_features=7, bias=True)
  )
)
  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Epoch 00000 | Loss 1.9631 | Test Acc 0.1900 | Time(s) nan


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 00001 | Loss 1.8063 | Test Acc 0.4400 | Time(s) nan
Epoch 00002 | Loss 1.6556 | Test Acc 0.5420 | Time(s) nan
Epoch 00003 | Loss 1.5267 | Test Acc 0.5830 | Time(s) 0.0240
Epoch 00004 | Loss 1.4080 | Test Acc 0.6150 | Time(s) 0.0241
Epoch 00005 | Loss 1.3046 | Test Acc 0.6410 | Time(s) 0.0244
Epoch 00006 | Loss 1.2102 | Test Acc 0.6730 | Time(s) 0.0243
Epoch 00007 | Loss 1.1190 | Test Acc 0.6980 | Time(s) 0.0244
Epoch 00008 | Loss 1.0296 | Test Acc 0.7240 | Time(s) 0.0244
Epoch 00009 | Loss 0.9434 | Test Acc 0.7200 | Time(s) 0.0243
Epoch 00010 | Loss 0.8620 | Test Acc 0.7200 | Time(s) 0.0243
Epoch 00011 | Loss 0.7873 | Test Acc 0.7170 | Time(s) 0.0243
Epoch 00012 | Loss 0.7201 | Test Acc 0.7100 | Time(s) 0.0243
Epoch 00013 | Loss 0.6595 | Test Acc 0.7140 | Time(s) 0.0243
Epoch 00014 | Loss 0.6032 | Test Acc 0.7190 | Time(s) 0.0243
Epoch 00015 | Loss 0.5502 | Test Acc 0.7260 | Time(s) 0.0243
Epoch 00016 | Loss 0.5010 | Test Acc 0.7270 | Time(s) 0.0243
Epoch 00017 | Loss 0.4564 | Te

In [15]:
####################    Citeseer Dataset     #####################

from dgl.data import CiteseerGraphDataset
net = Net(3703, 16, 16, 6)
print(net)
g, features, labels, train_mask, test_mask = load_dataset(dataset = CiteseerGraphDataset())
# Add edges between each node and itself to preserve old node representations
g.add_edges(g.nodes(), g.nodes())
optimizer = th.optim.Adam(net.parameters(), lr=1e-2)
dur = []
num_epoch = 50
for epoch in range(num_epoch):
    t0 = time.time()
    net.train()
#     golden = net.layer1
    logits = net(g, features, False) # forward
    logp = F.log_softmax(logits, 1)
    loss = F.nll_loss(logp[train_mask], labels[train_mask])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    dur.append(time.time() - t0)
    acc = evaluate(net, g, features, labels, test_mask) # forward
    print("Epoch {:05d} | Loss {:.4f} | Test Acc {:.4f} | Time(s) {:.4f}".format(
            epoch, loss.item(), acc, np.mean(dur)))
    
hidden_feature = net(g, features, True)
coo_hidden_feature = coo_matrix(hidden_feature.detach().numpy())
###  save to file ###
save_to_file("Citeseer_feat_L2.txt", coo_hidden_feature)

Net(
  (layer1): GCNLayer(
    (linear): Linear(in_features=3703, out_features=16, bias=True)
  )
  (layer2): GCNLayer(
    (linear): Linear(in_features=16, out_features=6, bias=True)
  )
)
Downloading /Users/pouya/.dgl/citeseer.zip from https://data.dgl.ai/dataset/citeseer.zip...
Extracting file to /Users/pouya/.dgl/citeseer


  r_inv = np.power(rowsum, -1).flatten()


Finished data loading and preprocessing.
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done saving data into cached files.
Epoch 00000 | Loss 1.8091 | Test Acc 0.2500 | Time(s) 0.0635
Epoch 00001 | Loss 1.7154 | Test Acc 0.2770 | Time(s) 0.0633
Epoch 00002 | Loss 1.6431 | Test Acc 0.3030 | Time(s) 0.0631
Epoch 00003 | Loss 1.5790 | Test Acc 0.3570 | Time(s) 0.0631
Epoch 00004 | Loss 1.5127 | Test Acc 0.4080 | Time(s) 0.0631
Epoch 00005 | Loss 1.4440 | Test Acc 0.4440 | Time(s) 0.0632
Epoch 00006 | Loss 1.3814 | Test Acc 0.4830 | Time(s) 0.0632
Epoch 00007 | Loss 1.3262 | Test Acc 0.5280 | Time(s) 0.0633
Epoch 00008 | Loss 1.2743 | Test Acc 0.5310 | Time(s) 0.0633
Epoch 00009 | Loss 1.2221 | Test Acc 0.5330 | Time(s) 0.0634
Epoch 00010 | Loss 1.1677 | Test Acc 0.5460 | Time(s) 0.0634
Epoch 00011 | Loss 1.1115 | Test Acc 0.5450 | Time(s) 0.0634
Epoch 00012 | Loss 1.0544 | Test Acc 0.5550 | T

In [41]:
####################    Pubmed Dataset     #####################

from dgl.data import PubmedGraphDataset
net = Net(500, 16, 16, 3)
print(net)
g, features, labels, train_mask, test_mask = load_dataset(dataset = PubmedGraphDataset())
# Add edges between each node and itself to preserve old node representations
g.add_edges(g.nodes(), g.nodes())
optimizer = th.optim.Adam(net.parameters(), lr=1e-2)
dur = []
num_epoch = 50
for epoch in range(num_epoch):
    t0 = time.time()
    net.train()
#     golden = net.layer1
    logits = net(g, features, False) # forward
    logp = F.log_softmax(logits, 1)
    loss = F.nll_loss(logp[train_mask], labels[train_mask])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    dur.append(time.time() - t0)
    acc = evaluate(net, g, features, labels, test_mask) # forward
    print("Epoch {:05d} | Loss {:.4f} | Test Acc {:.4f} | Time(s) {:.4f}".format(
            epoch, loss.item(), acc, np.mean(dur)))
    
hidden_feature = net(g, features, True)
coo_hidden_feature = coo_matrix(hidden_feature.detach().numpy())
###  save to file ###
save_to_file("Pubmed_feat_L2.txt", coo_hidden_feature)

Net(
  (layer1): GCNLayer(
    (linear): Linear(in_features=500, out_features=16, bias=True)
  )
  (layer2): GCNLayer(
    (linear): Linear(in_features=16, out_features=3, bias=True)
  )
)
  NumNodes: 19717
  NumEdges: 88651
  NumFeats: 500
  NumClasses: 3
  NumTrainingSamples: 60
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Epoch 00000 | Loss 1.1051 | Test Acc 0.5700 | Time(s) 0.0556
Epoch 00001 | Loss 0.9994 | Test Acc 0.6160 | Time(s) 0.0539
Epoch 00002 | Loss 0.8937 | Test Acc 0.6400 | Time(s) 0.0540
Epoch 00003 | Loss 0.8126 | Test Acc 0.6660 | Time(s) 0.0535
Epoch 00004 | Loss 0.7219 | Test Acc 0.6770 | Time(s) 0.0535
Epoch 00005 | Loss 0.6516 | Test Acc 0.6890 | Time(s) 0.0532
Epoch 00006 | Loss 0.5781 | Test Acc 0.6840 | Time(s) 0.0531
Epoch 00007 | Loss 0.5191 | Test Acc 0.6900 | Time(s) 0.0530
Epoch 00008 | Loss 0.4652 | Test Acc 0.7070 | Time(s) 0.0530
Epoch 00009 | Loss 0.4168 | Test Acc 0.7200 | Time(s) 0.0529
Epoch 00010 | Loss 0

In [50]:
####################    Reddit Dataset     #####################

from dgl.data import RedditDataset
net = Net(602, 16, 16, 41)
print(net)
g, features, labels, train_mask, test_mask = load_dataset(dataset = RedditDataset())
# Add edges between each node and itself to preserve old node representations
g.add_edges(g.nodes(), g.nodes())
optimizer = th.optim.Adam(net.parameters(), lr=1e-2)
dur = []
num_epoch = 50
for epoch in range(num_epoch):
    t0 = time.time()
    net.train()
#     golden = net.layer1
    logits = net(g, features, False) # forward
    logp = F.log_softmax(logits, 1)
    loss = F.nll_loss(logp[train_mask], labels[train_mask])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    dur.append(time.time() - t0)
    acc = evaluate(net, g, features, labels, test_mask) # forward
    print("Epoch {:05d} | Loss {:.4f} | Test Acc {:.4f} | Time(s) {:.4f}".format(
            epoch, loss.item(), acc, np.mean(dur)))
    
hidden_feature = net(g, features, True)
coo_hidden_feature = coo_matrix(hidden_feature.detach().numpy())
###  save to file ###
save_to_file("Reddit_feat_L2.txt", coo_hidden_feature)

Net(
  (layer1): GCNLayer(
    (linear): Linear(in_features=602, out_features=16, bias=True)
  )
  (layer2): GCNLayer(
    (linear): Linear(in_features=16, out_features=41, bias=True)
  )
)
Epoch 00000 | Loss 62792.0469 | Test Acc 0.1574 | Time(s) 43.9891
Epoch 00001 | Loss 168816.2500 | Test Acc 0.0286 | Time(s) 42.3227
Epoch 00002 | Loss 171078.6250 | Test Acc 0.0330 | Time(s) 41.8067
Epoch 00003 | Loss 129313.9844 | Test Acc 0.0734 | Time(s) 41.6736
Epoch 00004 | Loss 60475.3789 | Test Acc 0.1772 | Time(s) 41.5081
Epoch 00005 | Loss 25114.3281 | Test Acc 0.2316 | Time(s) 41.2634
Epoch 00006 | Loss 5377.3677 | Test Acc 0.3165 | Time(s) 41.1445
Epoch 00007 | Loss 3517.0352 | Test Acc 0.3524 | Time(s) 41.0212
Epoch 00008 | Loss 2742.8533 | Test Acc 0.3680 | Time(s) 40.9631
Epoch 00009 | Loss 2166.3064 | Test Acc 0.3596 | Time(s) 40.9303
Epoch 00010 | Loss 1941.8131 | Test Acc 0.3687 | Time(s) 40.8515
Epoch 00011 | Loss 1572.6862 | Test Acc 0.3691 | Time(s) 40.7757
Epoch 00012 | Loss 15

In [34]:
import numpy as np 
from scipy.sparse import coo_matrix
a = np.array([[2.6, 0.0, 9.9, 0], [0.0, 0.0, 7.9, 7.9]])
b = coo_matrix(a)
print(b.data)
print(b.row)
print(b.col)

[2.6 9.9 7.9 7.9]
[0 0 1 1]
[0 2 2 3]


In [51]:
# !pip install rdflib
# import rdflib