<a href="https://colab.research.google.com/github/saiashirwad/relation-prediction-3/blob/master/RELATION_PREDICTION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/saiashirwad/relation-prediction-3.git

fatal: destination path 'relation-prediction-3' already exists and is not an empty directory.


In [0]:
!pip install torch-scatter==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.5.0.html --quiet

In [3]:
!nvidia-smi

Tue Jun  2 07:11:58 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.82       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   54C    P8    31W / 149W |      0MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [4]:
cd relation-prediction-3/

/content/relation-prediction-3


In [0]:
# %%
import torch 
import torch.nn as nn 
import torch.nn.functional as F 
from torch.utils.data import DataLoader
import torch.optim as optim

from torch_scatter import scatter 

import numpy as np 

import os 

from layers import * 
from loss import * 
from evaluation import * 
from utils import * 
from dataloader import * 
from rotate import *

import IPython

import tqdm

In [0]:
class KGLayer(nn.Module):
    def __init__(self, n_entities, n_relations, in_dim, out_dim, input_drop=0.5, 
                 margin=6.0, epsilon=2.0, device="cuda", concat=True):
        super().__init__()

        self.n_entities = n_entities
        self.n_relations = n_relations
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.device = device

        self.margin = margin 
        self.epsilon = epsilon

        self.a = nn.Linear(3 * in_dim, out_dim).to(device)
        nn.init.xavier_normal_(self.a.weight.data, gain=1.414)

        self.a_2 = nn.Linear(out_dim, 1).to(device)
        nn.init.xavier_normal_(self.a_2.weight.data, gain=1.414)

        self.sparse_neighborhood_aggregation = SparseNeighborhoodAggregation()

        self.concat = concat 

        if concat:
            self.ent_embed_range = nn.Parameter(
                torch.Tensor([(self.margin + self.epsilon) / self.out_dim]), 
                requires_grad = False
            )
            
            self.rel_embed_range = nn.Parameter(
                torch.Tensor([(self.margin + self.epsilon) / self.out_dim]),
                requires_grad = False
            )
    
            self.ent_embed = nn.Embedding(n_entities, in_dim, max_norm=1, norm_type=2).to(device)
            self.rel_embed = nn.Embedding(n_relations, in_dim, max_norm=1, norm_type=2).to(device)
            
            nn.init.uniform_(self.ent_embed.weight.data, -self.ent_embed_range.item(), self.ent_embed_range.item())
            nn.init.uniform_(self.rel_embed.weight.data, -self.rel_embed_range.item(), self.rel_embed_range.item())

        self.input_drop = nn.Dropout(input_drop)

        self.bn0 = nn.BatchNorm1d(3 * in_dim).to(device)
        self.bn1 = nn.BatchNorm1d(out_dim).to(device)
    
    def forward(self, triplets, ent_embed=None, rel_embed=None):
        N = self.n_entities
    
        if self.concat:
            h = torch.cat((
                self.ent_embed(triplets[:, 0]),
                self.rel_embed(triplets[:, 1]),
                self.ent_embed(triplets[:, 2])
            ), dim=1)
        else:
            h = torch.cat((
                ent_embed[triplets[:, 0]],
                rel_embed[triplets[:, 1]],
                ent_embed[triplets[:, 2]]
            ), dim=1)

        h = self.input_drop(self.bn0(h))
        c = self.bn1(self.a(h))
        b = -F.leaky_relu(self.a_2(c))
        e_b = torch.exp(b) 

        temp = triplets.t()
        edges = torch.stack([temp[0], temp[2]])

        ebs = self.sparse_neighborhood_aggregation(edges, e_b, N, e_b.shape[0], 1)
        temp1 = e_b * c

        hs = self.sparse_neighborhood_aggregation(edges, temp1,  N, e_b.shape[0], self.out_dim)

        ebs[ebs == 0] = 1e-12
        h_ent = hs / ebs 

        index = triplets[:, 1]
        h_rel  = scatter(temp1, index=index, dim=0, reduce="mean") 

        return h_ent, h_rel

In [0]:
class RotAttLayer(nn.Module):
    def __init__(self, n_ent, n_rel, in_dim, out_dim, n_heads=1, input_drop=0.5, negative_rate = 10, margin=6.0, epsilon=2.0, batch_size=None, device="cuda"):
        super().__init__() 

        self.n_heads = n_heads 
        self.device = device

        self.in_dim = in_dim 
        self.out_dim = out_dim 
        self.margin = margin
        self.epsilon = epsilon
        self.batch_size = batch_size

        self.negative_rate = negative_rate 

        self.embedding_range = nn.Parameter(
            torch.Tensor([(self.margin + self.epsilon) / in_dim]), 
            requires_grad=False
        )    
    def rotate(self, h, r, t, mode):
        pi = 3.14159265358979323846

        re_head, im_head = torch.chunk(h, 2, dim=-1)
        re_tail, im_tail = torch.chunk(t, 2, dim=-1)

        phase_relation = r / (self.embedding_range.item() / pi) 
        
        re_relation = torch.cos(phase_relation)
        im_relation = torch.sin(phase_relation)
        
        if mode == 'head-batch':
            re_score = re_relation * re_tail + im_relation * im_tail
            im_score = re_relation * im_tail - im_relation * re_tail
            re_score = re_score - re_head
            im_score = im_score - im_head
        else:
            re_score = re_head * re_relation - im_head * im_relation
            im_score = re_head * im_relation + im_head * re_relation
            re_score = re_score - re_tail
            im_score = im_score - im_tail

        score = torch.stack([re_score, im_score], dim = 0)
        score = score.norm(dim = 0)

        score = self.margin - score.sum(dim = 2)
        return score

    def forward(self, sample, ent_embed, rel_embed, mode="single"):
        if mode == 'single':
            batch_size, negative_sample_size = sample.size(0), 1
                        
            head = torch.index_select(ent_embed, dim=0, index=sample[:,0]).unsqueeze(1)
            relation = torch.index_select(rel_embed, dim=0, index=sample[:,1]).unsqueeze(1)
            tail = torch.index_select(ent_embed, dim=0, index=sample[:,2]).unsqueeze(1)
            
        elif mode == 'head-batch':
            tail_part, head_part = sample
            batch_size, negative_sample_size = head_part.size(0), head_part.size(1)
            
            head = torch.index_select(ent_embed, dim=0, index=head_part.view(-1)).view(batch_size, negative_sample_size, -1)      
            relation = torch.index_select(rel_embed, dim=0, index=tail_part[:, 1]).unsqueeze(1)
            tail = torch.index_select(ent_embed, dim=0, index=tail_part[:, 2]).unsqueeze(1)
            
        elif mode == 'tail-batch':
            head_part, tail_part = sample
            batch_size, negative_sample_size = tail_part.size(0), tail_part.size(1)

            head = torch.index_select(ent_embed, dim=0, index=head_part[:, 0]).unsqueeze(1)
            relation = torch.index_select(rel_embed, dim=0, index=head_part[:, 1]).unsqueeze(1)
            tail = torch.index_select(ent_embed, dim=0, index=tail_part.view(-1)).view(batch_size, negative_sample_size, -1)
            
        score = self.rotate(head, relation, tail, mode)
        
        return score

class RotAtte(nn.Module):
    def __init__(self, n_ent, n_rel, in_dim, out_dim, n_heads=1, input_drop=0.5, negative_rate = 10, margin=6.0, epsilon=2.0, batch_size=None, device="cuda"):
        super().__init__()

        self.n_ent = n_ent 
        self.n_rel = n_rel 
        self.in_dim = in_dim 
        self.out_dim = out_dim
        self.n_heads = n_heads

        self.device = device

        self.a = nn.ModuleList([
            KGLayer(
                n_ent, n_rel, in_dim, out_dim, input_drop, margin=margin, epsilon=epsilon
            )
        for _ in range(self.n_heads)])

        self.rotate = RotAttLayer(n_ent, n_rel, in_dim, out_dim, n_heads=1, input_drop=0.5, negative_rate = negative_rate, margin=margin, epsilon=epsilon, batch_size=batch_size, device=device) 

        self.ent_transform = nn.Linear(n_heads * out_dim, out_dim).to(device)
        self.rel_transform = nn.Linear(n_heads * out_dim, out_dim // 2).to(device)
    
    def forward(self, sample, triplets, mode="single"):
        out = [a(triplets) for a in self.a]
        ent_embed = self.ent_transform(torch.cat([o[0] for o in out], dim=1))
        rel_embed = self.rel_transform(torch.cat([o[1] for o in out], dim=1))

        # mask_indices = torch.unique( torch.cat([ batch_triplets[:, 0], batch_triplets[:, 2]]) )
        if mode == 'single':
            mask_indices = torch.unique(torch.cat([ sample[:, 0], sample[:, 2] ]))
        elif mode == 'tail-batch':
            mask_indices = torch.unique(torch.cat([ sample[0][:, 0], sample[0][:, 2], sample[1].flatten()]))
        elif mode == 'head-batch':
            mask_indices = torch.unique(torch.cat([ sample[1][:, 0], sample[1][:, 2], sample[0].flatten()]))
        mask = torch.zeros(self.n_ent).to(self.device)
        mask[mask_indices] = 1.0
        ent_embed = mask.unsqueeze(-1).expand_as(ent_embed) * ent_embed 
        score = self.rotate(sample, ent_embed, rel_embed, mode)

        return score 
    
    def regularization(self):
        pass 

In [0]:
class RotAttTrainer:
    def __init__(self, name="lol", model = None, dataset="FB15k-237", n_epochs=1000, batch_size=2000, device="cuda", 
        optim_ = "sgd", lr = 0.001, checkpoint_dir="checkpoints"):
        self.name = name

        in_dim = 500
        out_dim = 500
        
        self.work_threads = 4 
        self.lr = lr 
        self.weight_decay = None
        self.n_epochs = n_epochs
        self.device = device
        self.adversarial_temperature = 1.0
        self.negative_sample_size = 10
        self.batch_size = batch_size
        self.test_batch_size = 100
    
        self._load_data(dataset)
        self.checkpoint_dir = checkpoint_dir

        if model is None:
            self.model = RotAtte(self.n_ent, self.n_rel, in_dim, out_dim, batch_size=batch_size)
        else:
            self.model = model

        self.optimizer = optim.SGD(self.model.parameters(), lr)
    
    def save_model(self):
        torch.save(self.model.state_dict(), f"{self.checkpoint_dir}/{self.name}")
    
    def load_model(self):
        self.model.load_state_dict(torch.load(f"{self.checkpoint_dir}/{self.name}"))
        
    def _load_data(self, dataset):
        data_path = f"data/{dataset}"
        with open(os.path.join(data_path, 'entities.dict')) as fin:
            entity2id = dict()
            for line in fin:
                eid, entity = line.strip().split('\t')
                entity2id[entity] = int(eid)

        with open(os.path.join(data_path, 'relations.dict')) as fin:
            relation2id = dict()
            for line in fin:
                rid, relation = line.strip().split('\t')
                relation2id[relation] = int(rid)

        self.n_ent = len(entity2id)
        self.n_rel = len(relation2id)

        self.train_triplets = read_triple(os.path.join(data_path, 'train.txt'), entity2id, relation2id)
        self.valid_triplets = read_triple(os.path.join(data_path, 'valid.txt'), entity2id, relation2id)
        self.test_triplets = read_triple(os.path.join(data_path, 'test.txt'), entity2id, relation2id)
        self.all_true_triplets = self.train_triplets + self.valid_triplets + self.test_triplets

        self.facts = torch.Tensor(self.train_triplets).to(torch.long).to(self.device)

        train_dataloader_head = DataLoader(
            TrainDataset(self.train_triplets, self.n_ent, self.n_rel, self.negative_sample_size, 'head-batch'), 
            batch_size=self.batch_size,
            shuffle=True, 
            collate_fn=TrainDataset.collate_fn
        )
        train_dataloader_tail = DataLoader(
            TrainDataset(self.train_triplets, self.n_ent, self.n_rel, self.negative_sample_size, 'tail-batch'), 
            batch_size=self.batch_size,
            shuffle=True, 
            collate_fn=TrainDataset.collate_fn
        )
        self.train_iterator = BidirectionalOneShotIterator(train_dataloader_head, train_dataloader_tail)
    
    def train_one_step(self):
        self.model.train() 
        self.optimizer.zero_grad()
        
        positive_sample, negative_sample, subsampling_weight, mode = next(self.train_iterator)
        positive_sample = positive_sample.to(self.device)
        negative_sample = negative_sample.to(self.device)
        subsampling_weight = subsampling_weight.to(self.device)
        
        negative_score = self.model((positive_sample, negative_sample), self.facts, mode=mode)
        negative_score = (F.softmax(negative_score * self.adversarial_temperature, dim = 1).detach() 
                              * F.logsigmoid(-negative_score)).sum(dim = 1)
        
        positive_score = self.model(positive_sample, self.facts)
        positive_score = F.logsigmoid(positive_score).squeeze(dim=1)
        
        # non uniform weights
        positive_loss = -(subsampling_weight * positive_score).sum() / subsampling_weight.sum()
        negative_loss = -(subsampling_weight * negative_score).sum() / subsampling_weight.sum()
        
        loss = (positive_loss + negative_loss) / 2 
        self.model.regularization() # not implemented yet
        loss.backward()
        self.optimizer.step()
        
        return positive_loss, negative_loss, loss
    
    def test(self):
        self.model.eval() 

        test_dataloader_head = DataLoader(
            TestDataset(
                self.test_triplets,
                self.all_true_triplets, 
                self.n_ent, 
                self.n_rel,
                'head-batch'
            ),
            batch_size=self.test_batch_size,
            num_workers=1,
            collate_fn=TestDataset.collate_fn
        )


        test_dataloader_tail = DataLoader(
            TestDataset(
                self.test_triplets,
                self.all_true_triplets, 
                self.n_ent, 
                self.n_rel,
                'tail-batch'
            ),
            batch_size=self.test_batch_size,
            num_workers=1,
            collate_fn=TestDataset.collate_fn
        )

        test_dataset_list = [test_dataloader_head, test_dataloader_tail]
        logs = [] 
        step = 0 
        total_steps = sum([len(dataset) for dataset in test_dataset_list])

        print(test_dataset_list)

        with torch.no_grad():
            for test_dataset in test_dataset_list:
                for positive_sample, negative_sample, filter_bias, mode in test_dataset:
                    # if step > 10000:
                    #     break
                    step += 1
                    positive_sample = positive_sample.cuda()
                    negative_sample = negative_sample.cuda()
                    filter_bias = filter_bias.cuda() 

                    batch_size = positive_sample.size(0)

                    score = self.model((positive_sample, negative_sample), self.facts, mode)
                    score += filter_bias 

                    argsort = torch.argsort(score, dim=1, descending=True)

                    if mode == 'head-batch':
                        positive_arg = positive_sample[:, 0]
                    elif mode == 'tail-batch':
                        positive_arg = positive_sample[:, 2]
                    else:
                        raise ValueError(f"mode {mode} is not supported")
                    
                    for i in range(batch_size):
                        ranking = (argsort[i, :] == positive_arg[i]).nonzero()
                        assert ranking.size(0) == 1

                        ranking = 1 + ranking.item()
                        logs.append({
                            'MRR': 1.0/ranking,
                            'MR': float(ranking),
                            'HITS@1': 1.0 if ranking <= 1 else 0.0,
                            'HITS@3': 1.0 if ranking <= 3 else 0.0,
                            'HITS@10': 1.0 if ranking <= 10 else 0.0,
                        })

            metrics = {}
            for metric in logs[0].keys():
                metrics[metric] = sum([log[metric] for log in logs]) / len(logs)
        
        print(metrics)
        return metrics 
    
    def run(self, max_steps=10000, log_every=100, resume=False):
        if resume:
            self.load_model()
        self.model.train()
        # TODO: handle lr dynamically for Adam
        avg_loss, avg_pos_loss, avg_neg_loss = 0, 0, 0
        for step in tqdm.tnrange(max_steps):
            positive_loss, negative_loss, loss = self.train_one_step()
            avg_loss += loss
            avg_pos_loss += positive_loss
            avg_neg_loss += negative_loss
            
            if step % log_every == 0:
                print(f'Step: {step}')
                print(f'Positive Loss: {avg_pos_loss / log_every} ')
                print(f'Negative Loss: {avg_neg_loss / log_every}')
                print(f'Loss:        : {avg_loss / log_every}')
                avg_loss, avg_pos_loss, avg_neg_loss = 0, 0, 0

                self.save_model()
        self.save_model()
            

In [0]:
trainer = RotAttTrainer(name="10k_1000_dim", batch_size=5000, checkpoint_dir="/content/drive/My Drive/Relation Prediction Train")

In [0]:
%xmode Plain
%pdb on
trainer.run(5000, 1000)

Exception reporting mode: Plain
Automatic pdb calling has been turned ON




HBox(children=(FloatProgress(value=0.0, max=5000.0), HTML(value='')))

RuntimeError: ignored

> [0;32m/usr/local/lib/python3.6/dist-packages/torch/autograd/__init__.py[0m(100)[0;36mbackward[0;34m()[0m
[0;32m     98 [0;31m    Variable._execution_engine.run_backward(
[0m[0;32m     99 [0;31m        [0mtensors[0m[0;34m,[0m [0mgrad_tensors[0m[0;34m,[0m [0mretain_graph[0m[0;34m,[0m [0mcreate_graph[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 100 [0;31m        allow_unreachable=True)  # allow_unreachable flag
[0m[0;32m    101 [0;31m[0;34m[0m[0m
[0m[0;32m    102 [0;31m[0;34m[0m[0m
[0m
ipdb> !nvidia-smi
*** NameError: name 'nvidia' is not defined
ipdb> l
[1;32m     95 [0m    [0;32mif[0m [0mretain_graph[0m [0;32mis[0m [0;32mNone[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[1;32m     96 [0m        [0mretain_graph[0m [0;34m=[0m [0mcreate_graph[0m[0;34m[0m[0;34m[0m[0m
[1;32m     97 [0m[0;34m[0m[0m
[1;32m     98 [0m    Variable._execution_engine.run_backward(
[1;32m     99 [0m        [0mtensors[0m[0;34m,[0m [0

In [0]:
%xmode Plain 
%pdb on 
torch.cuda.empty_cache()
trainer.test()