<a href="https://colab.research.google.com/github/tamara-kostova/IIS/blob/master/lab4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
!pip install torch
!pip install torch_geometric
!pip install torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.2.0+cpu.html

Looking in links: https://data.pyg.org/whl/torch-2.2.0+cpu.html


In [30]:
import torch
from torch_geometric.nn import TransE, ComplEx


**Function for training**

In [31]:
def train(model, data_loader, optimizer, epochs=50):
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        total_examples = 0

        for head_index, rel_type, tail_index in data_loader:
            optimizer.zero_grad()
            loss = model.loss(head_index, rel_type, tail_index)
            loss.backward()
            optimizer.step()
            total_loss += float(loss) * head_index.numel()
            total_examples += head_index.numel()

        loss = total_loss / total_examples
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

**Function for evaluating**

In [32]:
def evaluate(model, data_loader):
    hits1_list = []
    hits3_list = []
    hits10_list = []
    mr_list = []
    mrr_list = []

    for head_index, rel_type, tail_index in data_loader:
        head_embeds = model.node_emb(head_index)
        relation_embeds = model.rel_emb(rel_type)
        tail_embeds = model.node_emb(tail_index)

        if isinstance(model, TransE):
            scores = torch.norm(head_embeds + relation_embeds - tail_embeds, p=1, dim=1)

        elif isinstance(model, ComplEx):
            # Get real and imaginary parts
            re_relation, im_relation = torch.chunk(relation_embeds, 2, dim=1)
            re_head, im_head = torch.chunk(head_embeds, 2, dim=1)
            re_tail, im_tail = torch.chunk(tail_embeds, 2, dim=1)

            # Compute scores
            re_score = re_head * re_relation - im_head * im_relation
            im_score = re_head * im_relation + im_head * re_relation
            scores = (re_score * re_tail + im_score * im_tail)

            # Negate as we want to rank scores in ascending order, lower the better
            scores = - scores.sum(dim=1)

        else:
            raise ValueError(f'Unsupported model.')

        scores = scores.view(-1, head_embeds.size()[0])

        hits1, hits3, hits10, mr, mrr = eval_metrics(scores)
        hits1_list.append(hits1.item())
        hits3_list.append(hits3.item())
        hits10_list.append(hits10.item())
        mr_list.append(mr.item())
        mrr_list.append(mrr.item())

    hits1 = sum(hits1_list) / len(hits1_list)
    hits3 = sum(hits3_list) / len(hits1_list)
    hits10 = sum(hits10_list) / len(hits1_list)
    mr = sum(mr_list) / len(hits1_list)
    mrr = sum(mrr_list) / len(hits1_list)

    return hits1, hits3, hits10, mr, mrr

**Evaluation metrics**

In [33]:
def eval_metrics(y_pred):
    argsort = torch.argsort(y_pred, dim=1, descending=False)
    # not using argsort to do the rankings to avoid bias when the scores are equal
    ranking_list = torch.nonzero(argsort == 0, as_tuple=False)
    ranking_list = ranking_list[:, 1] + 1
    hits1_list = (ranking_list <= 1).to(torch.float)
    hits3_list = (ranking_list <= 3).to(torch.float)
    hits10_list = (ranking_list <= 10).to(torch.float)
    mr_list = ranking_list.to(torch.float)
    mrr_list = 1. / ranking_list.to(torch.float)

    return hits1_list.mean(), hits3_list.mean(), hits10_list.mean(), mr_list.mean(), mrr_list.mean()

**Load data**

In [34]:
from torch_geometric.datasets import FB15k_237

train_data = FB15k_237('../data/FB15k', split='train')[0]
val_data = FB15k_237('../data/FB15k', split='val')[0]
test_data = FB15k_237('../data/FB15k', split='test')[0]


# **EXERCISE 1**

TransE Knowledge Graph

In [35]:
from torch.optim import Adam

model = TransE(num_nodes=train_data.num_nodes,
                   num_relations=train_data.num_edge_types,
                   hidden_channels=30)

loader = model.loader(head_index=train_data.edge_index[0],
                          rel_type=train_data.edge_type,
                          tail_index=train_data.edge_index[1],
                          batch_size=1000,
                          shuffle=True)

optimizer = Adam(model.parameters(), lr=0.01)

**Train model**

In [36]:
train(model, loader, optimizer)

Epoch: 000, Loss: 0.7868
Epoch: 001, Loss: 0.5689
Epoch: 002, Loss: 0.4145
Epoch: 003, Loss: 0.3054
Epoch: 004, Loss: 0.2433
Epoch: 005, Loss: 0.2080
Epoch: 006, Loss: 0.1863
Epoch: 007, Loss: 0.1734
Epoch: 008, Loss: 0.1623
Epoch: 009, Loss: 0.1535
Epoch: 010, Loss: 0.1476
Epoch: 011, Loss: 0.1440
Epoch: 012, Loss: 0.1388
Epoch: 013, Loss: 0.1360
Epoch: 014, Loss: 0.1321
Epoch: 015, Loss: 0.1302
Epoch: 016, Loss: 0.1274
Epoch: 017, Loss: 0.1246
Epoch: 018, Loss: 0.1221
Epoch: 019, Loss: 0.1216
Epoch: 020, Loss: 0.1196
Epoch: 021, Loss: 0.1177
Epoch: 022, Loss: 0.1163
Epoch: 023, Loss: 0.1141
Epoch: 024, Loss: 0.1129
Epoch: 025, Loss: 0.1109
Epoch: 026, Loss: 0.1118
Epoch: 027, Loss: 0.1112
Epoch: 028, Loss: 0.1090
Epoch: 029, Loss: 0.1077
Epoch: 030, Loss: 0.1075
Epoch: 031, Loss: 0.1059
Epoch: 032, Loss: 0.1053
Epoch: 033, Loss: 0.1052
Epoch: 034, Loss: 0.1042
Epoch: 035, Loss: 0.1032
Epoch: 036, Loss: 0.1040
Epoch: 037, Loss: 0.1027
Epoch: 038, Loss: 0.1017
Epoch: 039, Loss: 0.1021


**TransE Model Results**

In [37]:
hits1, hits3, hits10, mr, mrr = evaluate(model,loader)

In [38]:
print(f'Mean Rank: {mr:.2f}, Mean Reciprocal Rank: {mrr:.4f}, '
      f'Hits@1: {hits1:.4f}, Hits@3: {hits3:.4f}, Hits@10: {hits10:.4f}')

Mean Rank: 532.97, Mean Reciprocal Rank: 0.0084, Hits@1: 0.0037, Hits@3: 0.0037, Hits@10: 0.0073


# **EXERCISE 2**

In [39]:
model2 = ComplEx(num_nodes=train_data.num_nodes,
                   num_relations=train_data.num_edge_types,
                   hidden_channels=50)

loader2 = model2.loader(head_index=train_data.edge_index[0],
                          rel_type=train_data.edge_type,
                          tail_index=train_data.edge_index[1],
                          batch_size=1000,
                          shuffle=True)

optimizer2 = Adam(model2.parameters(), lr=0.01)

**Train model**

In [40]:
train(model2, loader2, optimizer2)

Epoch: 000, Loss: 0.5524
Epoch: 001, Loss: 0.3052
Epoch: 002, Loss: 0.1696
Epoch: 003, Loss: 0.1295
Epoch: 004, Loss: 0.1107
Epoch: 005, Loss: 0.1018
Epoch: 006, Loss: 0.0951
Epoch: 007, Loss: 0.0907
Epoch: 008, Loss: 0.0870
Epoch: 009, Loss: 0.0832
Epoch: 010, Loss: 0.0809
Epoch: 011, Loss: 0.0801
Epoch: 012, Loss: 0.0772
Epoch: 013, Loss: 0.0758
Epoch: 014, Loss: 0.0738
Epoch: 015, Loss: 0.0736
Epoch: 016, Loss: 0.0719
Epoch: 017, Loss: 0.0718
Epoch: 018, Loss: 0.0704
Epoch: 019, Loss: 0.0698
Epoch: 020, Loss: 0.0690
Epoch: 021, Loss: 0.0670
Epoch: 022, Loss: 0.0673
Epoch: 023, Loss: 0.0674
Epoch: 024, Loss: 0.0688
Epoch: 025, Loss: 0.0679
Epoch: 026, Loss: 0.0670
Epoch: 027, Loss: 0.0659
Epoch: 028, Loss: 0.0662
Epoch: 029, Loss: 0.0657
Epoch: 030, Loss: 0.0656
Epoch: 031, Loss: 0.0630
Epoch: 032, Loss: 0.0643
Epoch: 033, Loss: 0.0627
Epoch: 034, Loss: 0.0615
Epoch: 035, Loss: 0.0630
Epoch: 036, Loss: 0.0618
Epoch: 037, Loss: 0.0626
Epoch: 038, Loss: 0.0622
Epoch: 039, Loss: 0.0620


**ComplEx Model Results**

In [41]:
hits1, hits3, hits10, mr, mrr = evaluate(model2,loader2)

In [42]:
print(f'Mean Rank: {mr:.2f}, Mean Reciprocal Rank: {mrr:.4f}, '
      f'Hits@1: {hits1:.4f}, Hits@3: {hits3:.4f}, Hits@10: {hits10:.4f}')

Mean Rank: 485.72, Mean Reciprocal Rank: 0.0123, Hits@1: 0.0037, Hits@3: 0.0073, Hits@10: 0.0183


## **Conclusion**

The **ComplEx** model has slightly better MR and MRR scores which means it outperforms it on average (*The Mean Rank is the average rank of the first correct answer - so **lower** values are better, whereas Mean Reciprocial Rank is the average of the reciprocal ranks of the first correct answer for each query which means **higher** numbers are better*).


---


The **ComplEx** model also has higher Hits@3 and Hits@10 than the **ComplEx** model, indicating that it is better at recommending relevant items within the top 3 and top 10 predictions.