In [21]:
cd /content/drive/My Drive/Thesis/code/relation-prediction-2

/content/drive/My Drive/Thesis/code/relation-prediction-2


In [0]:
pip install torchkge --quiet

In [0]:
pip install torch-scatter==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html --quiet

In [1]:
%load_ext autoreload
%autoreload 

In [2]:
import importlib

In [3]:
from tqdm import *
import tqdm

In [4]:
from layers import SpecialSpmmFinal

In [5]:
import train 
importlib.reload(train)
from train import *

In [6]:
import torch
import torch.nn as nn 

from functools import reduce
from operator import mul

from torch_scatter import scatter
from embedding import EmbeddingMul2

from sklearn.metrics.pairwise import pairwise_distances

In [7]:
kg_train, kg_test, kg_val = load_fb15k237()
args = Args(100, 200, 100, 2, 100, 2000, 0.001, 10, 'cuda', 'sgd')

In [8]:
n_ent, n_rel = kg_train.n_ent, kg_train.n_rel
total_triplets = get_valid_triplets(kg_train, kg_test, kg_val)

Number of unique triplets: 620232


In [9]:
dataloader = DataLoader(kg_train, batch_size=args.batch_size, shuffle=False, pin_memory=cuda.is_available())
ent_embed, rel_embed = get_init_embed()

In [38]:
class KGLayer(nn.Module):
    def __init__(self, n_entities, n_relations, ent_embed, rel_embed, in_dim, out_dim, concat=True, device="cuda"):
        super(KGLayer, self).__init__()

        self.n_entities = n_entities
        self.n_relations = n_relations
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.device = device

        self.a = nn.Linear(3 * in_dim, out_dim).to(device)
        nn.init.xavier_normal_(self.a.weight.data, gain=1.414)

        self.concat = concat

        self.a_2 = nn.Linear(out_dim, 1).to(device)
        nn.init.xavier_normal_(self.a_2.weight.data, gain=1.414)

        self.special_spmm_final = SpecialSpmmFinal()

        # self.ent_embed = EmbeddingMul2(n_entities, in_dim, ent_embed, True, torch.device(device))
        # self.rel_embed = EmbeddingMul2(n_relations, in_dim, rel_embed, True, torch.device(device))

        self.ent_embed = nn.Embedding(n_entities, in_dim).to(device)
        self.rel_embed = nn.Embedding(n_relations, in_dim).to(device)
        
        self.ent_embed.weight = nn.Parameter(torch.from_numpy(ent_embed))
        self.rel_embed.weight = nn.Parameter(torch.from_numpy(rel_embed)).to(device)
    
    def forward(self, triplets):
        N = self.n_entities

        h = torch.cat((
            self.ent_embed(triplets[:, 0]),
            self.ent_embed(triplets[:, 1]),
            self.rel_embed(triplets[:, 2])
        ), dim=1)
        c = self.a(h)
        b = F.leaky_relu(self.a_2(c))
        e_b = torch.exp(b)

        temp = triplets.t()
        edges = torch.stack([temp[0], temp[1]])

        ebs = self.special_spmm_final(edges, e_b, N, e_b.shape[0], 1)
        temp1 = e_b * c

        hs = self.special_spmm_final(edges, temp1,  N, e_b.shape[0], self.out_dim)

        ebs[ebs == 0] = 1e-12


        h_ent = hs / ebs

        index = triplets[:, 2]
        h_rel = scatter(temp1, index=index, dim=0, reduce="mean")

        # del h 
        # torch.cuda.empty_cache()

        if self.concat:
            return F.elu(h_ent), F.elu(h_rel)
        else:
            return h_ent, h_rel

In [39]:
model = KGLayer(n_ent, n_rel, ent_embed, rel_embed, 100, 100, True, "cuda")

TypeError: cannot assign 'torch.cuda.FloatTensor' as parameter 'weight' (torch.nn.Parameter or None expected)

In [33]:
optimizer = SGD(model.parameters(), lr=args.lr)

In [34]:
batches = [b for b in dataloader]

In [35]:
for epoch in tnrange(10):
    losses = []
    for i in tnrange(len(batches)):
        batch = batches[i]
        triplets = torch.stack(batch)
        triplets, labels, nodes, edges = negative_sampling(triplets, n_ent, args.negative_rate)
        triplets, labels = triplets.to(args.device), labels.to(args.device)
    
        model.zero_grad()
    
        # start = time.time()
        model.train()
        ent_embed_, rel_embed_ = model(triplets)
        loss = loss_func2(triplets, args.negative_rate, ent_embed_, rel_embed_, device="cuda")
        loss.backward()
        optimizer.step()
    
        # del triplets
        # del labels
        # del ent_embed_
        # del rel_embed_
        # del nodes 
        # del edges  
        torch.cuda.empty_cache()
    
        losses.append(loss.item())
        # print(loss.item())
        # del loss 
        torch.cuda.empty_cache()
    
    print(sum(losses) / len(losses))

  """Entry point for launching an IPython kernel.


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

  This is separate from the ipykernel package so we can avoid doing imports until


HBox(children=(FloatProgress(value=0.0, max=137.0), HTML(value='')))

RuntimeError: Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _th_index_select

In [0]:
from typing import Set, Tuple
def validate(model: KGLayer, kg: KnowledgeGraph, total_triplets: Set[Tuple], batch_size=1000, device='cuda'):
    batch_size = 1


    n_ranks = 10
    k = kg.n_ent

    dataloader = DataLoader(kg, batch_size, shuffle=True)

    hits = [[] for _ in range(10)]
    ranks = [] 

    rankHs, rankTs = [], []

    head_rank_mean, tail_rank_mean, filtered_head_rank_mean, filtered_tail_rank_mean = [0] * 4

    head_hits_10_raw, head_hits_10_filter, tail_hits_10_raw, tail_hits_10_filter = [0] * 4

    data = [d for d in dataloader]

    for i in tnrange(300):
        batch = data[i]
        
        src, dst, rel = batch

        if device == 'cuda':
            src = src.to(device)
            dst = dst.to(device)
            rel = rel.to(device)

        src = model.ent_embed(src)
        dst = model.ent_embed(dst)
        rel = model.rel_embed(rel)

        loss = torch.norm(src + rel - dst, 2, 1)
        loss = loss.repeat(kg.n_ent)


        src_ = src.repeat(kg.n_ent, 1)
        dst_ = dst.repeat(kg.n_ent, 1)
        rel_ = rel.repeat(kg.n_ent, 1)

        dist_head_prediction = model.ent_embed.weight + rel_ - dst_
        dist_tail_prediction = src_ + rel_ - model.ent_embed.weight

        _, head_prediction = torch.topk(torch.sum(torch.abs(dist_head_prediction), dim=1), k=k)
        _, tail_prediction = torch.topk(torch.sum(torch.abs(dist_tail_prediction), dim=1), k=k)

        head_prediction = head_prediction.cpu()
        tail_prediction = tail_prediction.cpu()

        head_rank_raw, tail_rank_raw, head_rank_filter, tail_rank_filter = [0] * 4


        s, d, r = [b[0] for b in batch] 

        for candidate in head_prediction:
            if candidate == s:
                break
            else:
                head_rank_raw += 1 
                if (candidate, d, r) in total_triplets:
                    continue
                else:
                    head_rank_filter += 1
        
        for candidate in tail_prediction:
            if candidate == d:
                break 
            else:
                tail_rank_raw += 1
                if (s, candidate, r) in total_triplets:
                    continue
                else:
                    tail_rank_filter += 1 

        head_rank_mean += head_rank_raw
        tail_rank_mean += tail_rank_raw

        filtered_head_rank_mean += head_rank_filter
        filtered_tail_rank_mean += tail_rank_filter

        if head_rank_raw < 10:
            head_hits_10_raw += 1
        
        if tail_rank_raw < 10:
            tail_hits_10_raw += 1

        if head_rank_filter < 10:
            head_hits_10_filter += 1
        
        if tail_rank_filter < 10:
            tail_hits_10_filter += 1
        
    
    head_rank_mean /= 200
    tail_rank_mean /= 200

    filtered_head_rank_mean /= 200
    filtered_tail_rank_mean /= 200

    print(f'Head Rank Mean : {head_rank_mean} | Hits@10 : {head_hits_10_raw}')
    print(f'Tail Rank Mean : {tail_rank_mean} | Hits@10 : {tail_hits_10_raw}')

    print(f'Filtered Head Rank Mean: {filtered_head_rank_mean}')
    print(f'Filtered Tail Rank MEan: {filtered_tail_rank_mean}')


    print()


In [19]:
model.eval()
validate(model, kg_val, total_triplets, 100, 'cuda')



HBox(children=(IntProgress(value=0, max=300), HTML(value='')))


Head Rank Mean : 10696.24 | Hits@10 : 0
Tail Rank Mean : 10887.485 | Hits@10 : 0
Filtered Head Rank Mean: 10696.24
Filtered Tail Rank MEan: 10887.485



In [20]:
model.eval()

KGLayer(
  (a): Linear(in_features=300, out_features=100, bias=True)
  (a_2): Linear(in_features=100, out_features=1, bias=True)
  (special_spmm_final): SpecialSpmmFinal()
  (ent_embed): Embedding(14541, 100)
  (rel_embed): Embedding(237, 100)
)

In [21]:
ent_embed = model.ent_embed.weight.detach().cpu().numpy()
rel_embed = model.rel_embed.weight.detach().cpu().numpy()

In [23]:
ent_embed2 = model.ent_embed.weight.detach().cpu().numpy()
rel_embed2 = model.rel_embed.weight.detach().cpu().numpy()

In [26]:
ent_embed2[0]

array([ 0.480351  , -0.45701915,  1.4417819 , -1.5577084 , -1.0751399 ,
        0.93779457, -0.28031343,  0.6238077 , -0.23918626, -1.7936542 ,
        1.142066  , -0.11522719, -0.06288404, -0.00304152, -0.43539128,
        0.4698383 , -0.37051785,  0.48074928,  0.5917998 , -1.9509126 ,
       -1.3585271 ,  0.1316042 , -0.1388472 ,  0.697979  ,  0.3077121 ,
       -0.5928566 ,  1.4713794 , -1.3127495 , -0.77875924, -0.4356992 ,
       -0.8674456 , -1.625081  , -0.54463595, -2.0959022 ,  1.0299261 ,
        0.89777786, -0.20698252, -0.04713991,  0.7651625 , -0.7150702 ,
        0.45574683, -1.7639375 ,  0.07258008, -0.39445192,  1.454451  ,
       -0.36854097, -2.4826264 ,  1.1244382 ,  0.08095083,  0.66514915,
        0.75765675,  0.82752866, -0.2354866 , -0.24501884, -0.27691522,
        0.54642063, -0.37737754,  0.9660993 , -0.13023016, -1.4070644 ,
        0.7163284 ,  0.24646816,  0.850484  , -1.2625173 ,  0.97371674,
        0.7892326 , -1.7101912 ,  0.9557272 ,  1.2295837 ,  0.40

In [0]:
dl = DataLoader(kg_val, 1, shuffle=True)
data = [d for d in dl]
triplets = data[:20]

In [0]:
src_ = [triplet[0].item() for triplet in triplets]
dst_ = [triplet[1].item() for triplet in triplets]
rel_ = [triplet[2].item() for triplet in triplets]

src = ent_embed[src_]
dst = ent_embed[dst_]
rel = rel_embed[rel_]

In [0]:
dist = pairwise_distances(dst - rel, ent_embed, metric="manhattan")

In [0]:
rankArrayHead = np.argsort(dist, axis=1)

In [0]:
rankListHead =  [int(np.argwhere(e[1] == e[0])) for e in zip(src_, rankArrayHead)]

In [0]:
isHit10Head = [x for x in rankListHead if x < 10]

[0]

In [0]:
e = src_[0]
ee = rankArrayHead[0]

In [101]:
e

tensor([7814])

In [61]:
triplets[0]

[tensor([4576]), tensor([4409]), tensor([191])]

In [19]:
!nvidia-smi

'nvidia-smi' is not recognized as an internal or external command,
operable program or batch file.


In [21]:
dataloader = DataLoader(kg_val, 1, shuffle=True)
data = [d for d  in dataloader]
d = data[0]
model.eval()

KGLayer(
  (a): Linear(in_features=300, out_features=100, bias=True)
  (a_2): Linear(in_features=100, out_features=1, bias=True)
  (special_spmm_final): SpecialSpmmFinal()
  (ent_embed): Embedding(14541, 100)
  (rel_embed): Embedding(237, 100)
)

In [0]:
head = model.ent_embed(d[0].to("cuda"))
tail = model.ent_embed(d[1].to("cuda"))
rel  = model.rel_embed(d[2].to("cuda"))

In [0]:
target_loss = torch.norm(head + rel - tail, 2).repeat(n_ent, 1)

In [0]:
tmphead = head.repeat(n_ent, 1)
tmptail = tail.repeat(n_ent, 1)
tmprel = rel.repeat(n_ent, 1)

In [0]:
tmpHloss = torch.norm(model.ent_embed.weight + tmprel - tmptail, 2, 1).view(-1, 1)

In [26]:
tmpHloss

tensor([[18.1168],
        [19.2851],
        [17.9797],
        ...,
        [20.4873],
        [20.5492],
        [17.9054]], device='cuda:0', grad_fn=<ViewBackward>)

In [0]:
tmpTloss = torch.norm(tmphead + rel - model.ent_embed.weight, 2, 1).view(-1, 1)

In [28]:
tmpTloss

tensor([[18.8130],
        [19.0527],
        [18.2580],
        ...,
        [16.3002],
        [18.1240],
        [18.3511]], device='cuda:0', grad_fn=<ViewBackward>)

In [0]:
rankH = torch.nonzero(F.relu(target_loss - tmpHloss)).size()[0]

In [30]:
rankH

11112

In [0]:
rankT = torch.nonzero(F.relu(target_loss - tmpTloss)).size()[0]

In [32]:
rankT

14308

In [17]:
def temp_validate(model, kg, total_triplets, device):
    dataloader = DataLoader(kg, 1, shuffle=True)
    data = [d for d in dataloader]

    ranks = []

    for i in tnrange(200):
        triplet = data[i]
        if i:
            head = model.ent_embed(triplet[0].to("cuda"))
            tail = model.ent_embed(triplet[1].to("cuda"))
            rel  = model.rel_embed(triplet[2].to("cuda"))
    
            targetLoss = torch.norm(head + rel - tail, 2).repeat(kg.n_ent, 1)
    
            tmpHead = head.repeat(kg.n_ent, 1)
            tmpTail = tail.repeat(kg.n_ent, 1)
            tmpRel  = rel.repeat(kg.n_ent, 1)
    
            tmpHloss = torch.norm(model.ent_embed.weight + tmpRel - tmpTail, 2, 1).view(-1, 1)
            tmpTloss = torch.norm(tmpHead + tmpRel - model.ent_embed.weight, 2, 1).view(-1, 1)
    
            rankH = torch.nonzero(F.relu(targetLoss - tmpHloss)).size()[0]
            rankT = torch.nonzero(F.relu(targetLoss - tmpTloss)).size()[0]
    
            ranks.append( (rankH + rankT + 2) / 2 )
    
    return sum(ranks) / len(ranks)

In [18]:
temp_validate(model, kg_val, total_triplets, "cuda")

  import sys


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))




7188.693467336683

In [1]:
data

NameError: ignored