<a href="https://colab.research.google.com/github/tamara-kostova/IIS/blob/master/lab4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install torch
!pip install torch_geometric
!pip install torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.2.0+cpu.html

Collecting torch_geometric
  Downloading torch_geometric-2.5.3-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch_geometric
Successfully installed torch_geometric-2.5.3
Looking in links: https://data.pyg.org/whl/torch-2.2.0+cpu.html
Collecting torch_scatter
  Downloading https://data.pyg.org/whl/torch-2.2.0%2Bcpu/torch_scatter-2.1.2%2Bpt22cpu-cp310-cp310-linux_x86_64.whl (508 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m508.1/508.1 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_sparse
  Downloading https://data.pyg.org/whl/torch-2.2.0%2Bcpu/torch_sparse-0.6.18%2Bpt22cpu-cp310-cp310-linux_x86_64.whl (1.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m51.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_cluster
  Downloading https://data.pyg.org/whl/torch-2.2.0%2Bcpu/to

In [4]:
import torch
from torch_geometric.nn import TransE, ComplEx


**Function for training**

In [5]:
def train(model, data_loader, optimizer, epochs=5):
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        total_examples = 0

        for head_index, rel_type, tail_index in data_loader:
            optimizer.zero_grad()
            loss = model.loss(head_index, rel_type, tail_index)
            loss.backward()
            optimizer.step()
            total_loss += float(loss) * head_index.numel()
            total_examples += head_index.numel()

        loss = total_loss / total_examples
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

**Function for evaluating**

In [6]:
def evaluate(model, data_loader):
    hits1_list = []
    hits3_list = []
    hits10_list = []
    mr_list = []
    mrr_list = []

    for head_index, rel_type, tail_index in data_loader:
        head_embeds = model.node_emb(head_index)
        relation_embeds = model.rel_emb(rel_type)
        tail_embeds = model.node_emb(tail_index)

        if isinstance(model, TransE):
            scores = torch.norm(head_embeds + relation_embeds - tail_embeds, p=1, dim=1)

        elif isinstance(model, ComplEx):
            # Get real and imaginary parts
            re_relation, im_relation = torch.chunk(relation_embeds, 2, dim=1)
            re_head, im_head = torch.chunk(head_embeds, 2, dim=1)
            re_tail, im_tail = torch.chunk(tail_embeds, 2, dim=1)

            # Compute scores
            re_score = re_head * re_relation - im_head * im_relation
            im_score = re_head * im_relation + im_head * re_relation
            scores = (re_score * re_tail + im_score * im_tail)

            # Negate as we want to rank scores in ascending order, lower the better
            scores = - scores.sum(dim=1)

        else:
            raise ValueError(f'Unsupported model.')

        scores = scores.view(-1, head_embeds.size()[0])

        hits1, hits3, hits10, mr, mrr = eval_metrics(scores)
        hits1_list.append(hits1.item())
        hits3_list.append(hits3.item())
        hits10_list.append(hits10.item())
        mr_list.append(mr.item())
        mrr_list.append(mrr.item())

    hits1 = sum(hits1_list) / len(hits1_list)
    hits3 = sum(hits3_list) / len(hits1_list)
    hits10 = sum(hits10_list) / len(hits1_list)
    mr = sum(mr_list) / len(hits1_list)
    mrr = sum(mrr_list) / len(hits1_list)

    return hits1, hits3, hits10, mr, mrr

**Evaluation metrics**

In [7]:
def eval_metrics(y_pred):
    argsort = torch.argsort(y_pred, dim=1, descending=False)
    # not using argsort to do the rankings to avoid bias when the scores are equal
    ranking_list = torch.nonzero(argsort == 0, as_tuple=False)
    ranking_list = ranking_list[:, 1] + 1
    hits1_list = (ranking_list <= 1).to(torch.float)
    hits3_list = (ranking_list <= 3).to(torch.float)
    hits10_list = (ranking_list <= 10).to(torch.float)
    mr_list = ranking_list.to(torch.float)
    mrr_list = 1. / ranking_list.to(torch.float)

    return hits1_list.mean(), hits3_list.mean(), hits10_list.mean(), mr_list.mean(), mrr_list.mean()

**Load data**

In [8]:
from torch_geometric.datasets import FB15k_237

train_data = FB15k_237('../data/FB15k', split='train')[0]
val_data = FB15k_237('../data/FB15k', split='val')[0]
test_data = FB15k_237('../data/FB15k', split='test')[0]


Downloading https://raw.githubusercontent.com/villmow/datasets_knowledge_embedding/master/FB15k-237/train.txt
Downloading https://raw.githubusercontent.com/villmow/datasets_knowledge_embedding/master/FB15k-237/valid.txt
Downloading https://raw.githubusercontent.com/villmow/datasets_knowledge_embedding/master/FB15k-237/test.txt
Processing...
Done!


# **EXERCISE 1**

TransE Knowledge Graph

In [9]:
from torch.optim import Adam

model = TransE(num_nodes=train_data.num_nodes,
                   num_relations=train_data.num_edge_types,
                   hidden_channels=50)

loader = model.loader(head_index=train_data.edge_index[0],
                          rel_type=train_data.edge_type,
                          tail_index=train_data.edge_index[1],
                          batch_size=1000,
                          shuffle=True)

optimizer = Adam(model.parameters(), lr=0.01)

**Train model**

In [10]:
train(model, loader, optimizer)

Epoch: 000, Loss: 0.7595
Epoch: 001, Loss: 0.5575
Epoch: 002, Loss: 0.4357
Epoch: 003, Loss: 0.3495
Epoch: 004, Loss: 0.2951


**Results**

In [11]:
rank, mrr, hits10 = model.test(head_index=test_data.edge_index[0],
                                   rel_type=test_data.edge_type,
                                   tail_index=test_data.edge_index[1],
                                   batch_size=1000, k=10)

100%|██████████| 20466/20466 [22:33<00:00, 15.12it/s]


In [15]:
print('Results:')
print(f'Rank: {rank}, MRR: {mrr:.4f}, Hits@10: {hits10:.2f}')

Results:
Rank: 688.9366455078125, MRR: 0.1961, Hits@10: 0.33


# **EXERCISE 2**

In [12]:
model2 = ComplEx(num_nodes=train_data.num_nodes,
                   num_relations=train_data.num_edge_types,
                   hidden_channels=50)

loader2 = model2.loader(head_index=train_data.edge_index[0],
                          rel_type=train_data.edge_type,
                          tail_index=train_data.edge_index[1],
                          batch_size=1000,
                          shuffle=True)

optimizer2 = Adam(model.parameters(), lr=0.01)

**Train model**

In [13]:
train(model2, loader2, optimizer2)

Epoch: 000, Loss: 0.6931
Epoch: 001, Loss: 0.6931
Epoch: 002, Loss: 0.6931
Epoch: 003, Loss: 0.6931
Epoch: 004, Loss: 0.6931


**Results**

In [16]:
rank, mrr, hits10 = model2.test(head_index=test_data.edge_index[0],
                                   rel_type=test_data.edge_type,
                                   tail_index=test_data.edge_index[1],
                                   batch_size=1000, k=10)

100%|██████████| 20466/20466 [04:17<00:00, 79.57it/s]


In [17]:
print('Results:')
print(f'Rank: {rank}, MRR: {mrr:.4f}, Hits@10: {hits10:.2f}')

Results:
Rank: 7262.2880859375, MRR: 0.0007, Hits@10: 0.00
