In [1]:
import dgl
import dgl.nn as dglnn
import torch.nn as nn
import torch.nn.functional as F
import torch


# Define a Heterograph Conv model

class RGCN(nn.Module):
    def __init__(self, emb_types, emb_size, hid_feats, out_feats, rel_names):
        super().__init__()
        # https://www.jianshu.com/p/767950b560c4
        embed_dict = {ntype : nn.Parameter(torch.Tensor(emb_types[ntype], emb_size))
                      for ntype in emb_types.keys()}
        for key, embed in embed_dict.items():
            nn.init.xavier_uniform_(embed)
        self.embed = nn.ParameterDict(embed_dict)
        self.conv1 = dglnn.HeteroGraphConv({
            rel: dglnn.GraphConv(emb_size, hid_feats)
            for rel in rel_names}, aggregate='sum')
        self.conv2 = dglnn.HeteroGraphConv({
            rel: dglnn.GraphConv(hid_feats, out_feats)
            for rel in rel_names}, aggregate='sum')

    def forward(self, graph):
        # inputs are features of nodes
        h = self.conv1(graph, self.embed)
        print(h)
        h = {k: F.relu(v) for k, v in h.items()}
        h = self.conv2(graph, h)
        return h

In [2]:
g = dgl.load_graphs("./graphs/industrial_and_scientific_5_core.dgl")[0][0]
g

Graph(num_nodes={'Brand': 1900, 'Customer': 11041, 'Product': 5334, 'Review': 77071},
      num_edges={('Brand', 'rev_SOLD_BY', 'Product'): 5555, ('Customer', 'WROTE', 'Review'): 77071, ('Product', 'SOLD_BY', 'Brand'): 5555, ('Product', 'rev_REVIEW_OF', 'Review'): 77071, ('Review', 'REVIEW_OF', 'Product'): 77071, ('Review', 'rev_WROTE', 'Customer'): 77071},
      metagraph=[('Brand', 'Product', 'rev_SOLD_BY'), ('Product', 'Brand', 'SOLD_BY'), ('Product', 'Review', 'rev_REVIEW_OF'), ('Customer', 'Review', 'WROTE'), ('Review', 'Product', 'REVIEW_OF'), ('Review', 'Customer', 'rev_WROTE')])

In [3]:
model = RGCN({x: g.number_of_nodes(x) for x in g.ntypes}, 512, 256, 2, g.etypes)

labels = g.nodes["Review"].data["Positive"]
train_mask = g.nodes["Review"].data["train_mask"]
test_mask = g.nodes["Review"].data["test_mask"]

In [4]:
opt = torch.optim.Adam(model.parameters())

for epoch in range(2):
    model.train()

    logits = model(g)["Review"]
    loss = F.cross_entropy(logits[train_mask == 1], labels[train_mask == 1].type(torch.long))
    opt.zero_grad()
    loss.backward()
    opt.step()
    print("Epoch:", epoch, "Loss:", loss.item())
    break

{'Brand': tensor([[ 0.0037,  0.0077, -0.0607,  ..., -0.0013,  0.0030, -0.0088],
        [ 0.0034,  0.0066, -0.0284,  ...,  0.0020,  0.0127,  0.0426],
        [-0.0022,  0.0029, -0.0073,  ...,  0.0356, -0.0299, -0.0025],
        ...,
        [ 0.0234, -0.0025, -0.0008,  ..., -0.0257, -0.0016,  0.0036],
        [ 0.0129,  0.0355, -0.0084,  ..., -0.0423,  0.0018, -0.0011],
        [-0.0307, -0.0008, -0.0171,  ...,  0.0137, -0.0144, -0.0030]],
       grad_fn=<SumBackward1>), 'Customer': tensor([[ 0.0024,  0.0092,  0.0089,  ...,  0.0051,  0.0074, -0.0017],
        [-0.0068,  0.0031,  0.0002,  ..., -0.0018,  0.0055,  0.0037],
        [ 0.0013,  0.0045, -0.0037,  ..., -0.0033, -0.0134, -0.0054],
        ...,
        [ 0.0067,  0.0032,  0.0035,  ..., -0.0046,  0.0015,  0.0016],
        [-0.0068, -0.0008, -0.0126,  ...,  0.0017,  0.0066,  0.0184],
        [-0.0057,  0.0047, -0.0077,  ...,  0.0022, -0.0037,  0.0034]],
       grad_fn=<SumBackward1>), 'Product': tensor([[-0.0072,  0.0053,  0.0118,



Epoch: 0 Loss: 0.6961133480072021


In [26]:
model.eval()
preds = model(g)["Review"][test_mask == 1].detach().numpy()
y_test = labels[test_mask == 1]

In [27]:
from sklearn.metrics import classification_report

print(classification_report(y_test, preds.argmax(1)))

              precision    recall  f1-score   support

         0.0       0.25      0.22      0.23      4750
         1.0       0.89      0.91      0.90     33786

    accuracy                           0.82     38536
   macro avg       0.57      0.56      0.57     38536
weighted avg       0.81      0.82      0.82     38536



In [28]:
from sklearn.metrics import f1_score
f1_score(y_test, preds.argmax(1))

0.8987780519327928

In [29]:
from sklearn.metrics import recall_score
recall_score(y_test, preds.argmax(1))

0.9056413899248209

In [30]:
from sklearn.metrics import precision_score
precision_score(y_test, preds.argmax(1))

0.8920179581365518

In [31]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, preds.argmax(1))

0.8211542453809425

In [32]:
customerSim = (model.embed["Customer"] @ model.embed["Customer"].T)

In [33]:
customerSim.shape

torch.Size([11041, 11041])

In [35]:
reviewSim = (model.embed["Review"] @ model.embed["Review"].T)
reviewSim.shape

torch.Size([77071, 77071])

In [38]:
reviewSim[100]

tensor([ 0.2834,  0.3500,  0.2826,  ..., -0.0085,  0.1574,  0.1536],
       grad_fn=<SelectBackward0>)