In [1]:
import dgl
import dgl.nn as dglnn
import torch.nn as nn
import torch.nn.functional as F
import torch


# Define a Heterograph Conv model

class RGCN(nn.Module):
    def __init__(self, emb_types, emb_size, hid_feats, out_feats, rel_names):
        super().__init__()
        # https://www.jianshu.com/p/767950b560c4
        embed_dict = {ntype : nn.Parameter(torch.Tensor(emb_types[ntype], emb_size))
                      for ntype in emb_types.keys()}
        for key, embed in embed_dict.items():
            nn.init.xavier_uniform_(embed)
        self.embed = nn.ParameterDict(embed_dict)
        self.conv1 = dglnn.HeteroGraphConv({
            rel: dglnn.GraphConv(emb_size, hid_feats)
            for rel in rel_names}, aggregate='sum')
        self.conv2 = dglnn.HeteroGraphConv({
            rel: dglnn.GraphConv(hid_feats, out_feats)
            for rel in rel_names}, aggregate='sum')

    def forward(self, graph):
        # inputs are features of nodes
        h = self.conv1(graph, self.embed)
        h = {k: F.relu(v) for k, v in h.items()}
        h = self.conv2(graph, h)
        return h

In [2]:
g = dgl.load_graphs("./graphs/industrial_and_scientific_5_core.dgl")[0][0]
g

Graph(num_nodes={'Brand': 1900, 'Customer': 11041, 'Product': 5334, 'Review': 77071},
      num_edges={('Brand', 'rev_SOLD_BY', 'Product'): 5555, ('Customer', 'WROTE', 'Review'): 77071, ('Product', 'SOLD_BY', 'Brand'): 5555, ('Product', 'rev_REVIEW_OF', 'Review'): 77071, ('Review', 'REVIEW_OF', 'Product'): 77071, ('Review', 'rev_WROTE', 'Customer'): 77071},
      metagraph=[('Brand', 'Product', 'rev_SOLD_BY'), ('Product', 'Brand', 'SOLD_BY'), ('Product', 'Review', 'rev_REVIEW_OF'), ('Customer', 'Review', 'WROTE'), ('Review', 'Product', 'REVIEW_OF'), ('Review', 'Customer', 'rev_WROTE')])

In [3]:
model = RGCN({x: g.number_of_nodes(x) for x in g.ntypes}, 512, 256, 2, g.etypes)

labels = g.nodes["Review"].data["Positive"]
train_mask = g.nodes["Review"].data["train_mask"]
test_mask = g.nodes["Review"].data["test_mask"]

In [4]:
opt = torch.optim.Adam(model.parameters())

for epoch in range(100):
    model.train()

    logits = model(g)["Review"]
    loss = F.cross_entropy(logits[train_mask == 1], labels[train_mask == 1].type(torch.long))
    opt.zero_grad()
    loss.backward()
    opt.step()
    print("Epoch:", epoch, "Loss:", loss.item())



Epoch: 0 Loss: 0.6938824653625488
Epoch: 1 Loss: 0.6721258163452148
Epoch: 2 Loss: 0.6499289870262146
Epoch: 3 Loss: 0.6240671277046204
Epoch: 4 Loss: 0.5933253765106201
Epoch: 5 Loss: 0.5572666525840759
Epoch: 6 Loss: 0.5162279605865479
Epoch: 7 Loss: 0.4714601933956146
Epoch: 8 Loss: 0.4250355660915375
Epoch: 9 Loss: 0.37954750657081604
Epoch: 10 Loss: 0.3377084732055664
Epoch: 11 Loss: 0.30186063051223755
Epoch: 12 Loss: 0.27343881130218506
Epoch: 13 Loss: 0.25256577134132385
Epoch: 14 Loss: 0.23805256187915802
Epoch: 15 Loss: 0.22785340249538422
Epoch: 16 Loss: 0.2197447121143341
Epoch: 17 Loss: 0.21191327273845673
Epoch: 18 Loss: 0.20328596234321594
Epoch: 19 Loss: 0.19355598092079163
Epoch: 20 Loss: 0.18298961222171783
Epoch: 21 Loss: 0.1721925437450409
Epoch: 22 Loss: 0.16190926730632782
Epoch: 23 Loss: 0.15282286703586578
Epoch: 24 Loss: 0.14549055695533752
Epoch: 25 Loss: 0.14025470614433289
Epoch: 26 Loss: 0.1369982808828354
Epoch: 27 Loss: 0.1351412832736969
Epoch: 28 Loss: 

In [26]:
model.eval()
preds = model(g)["Review"][test_mask == 1].detach().numpy()
y_test = labels[test_mask == 1]

In [27]:
from sklearn.metrics import classification_report

print(classification_report(y_test, preds.argmax(1)))

              precision    recall  f1-score   support

         0.0       0.25      0.22      0.23      4750
         1.0       0.89      0.91      0.90     33786

    accuracy                           0.82     38536
   macro avg       0.57      0.56      0.57     38536
weighted avg       0.81      0.82      0.82     38536



In [28]:
from sklearn.metrics import f1_score
f1_score(y_test, preds.argmax(1))

0.8987780519327928

In [29]:
from sklearn.metrics import recall_score
recall_score(y_test, preds.argmax(1))

0.9056413899248209

In [30]:
from sklearn.metrics import precision_score
precision_score(y_test, preds.argmax(1))

0.8920179581365518

In [31]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, preds.argmax(1))

0.8211542453809425

In [32]:
customerSim = (model.embed["Customer"] @ model.embed["Customer"].T)

In [33]:
customerSim.shape

torch.Size([11041, 11041])

In [35]:
reviewSim = (model.embed["Review"] @ model.embed["Review"].T)
reviewSim.shape

torch.Size([77071, 77071])

In [38]:
reviewSim[100]

tensor([ 0.2834,  0.3500,  0.2826,  ..., -0.0085,  0.1574,  0.1536],
       grad_fn=<SelectBackward0>)