In [1]:
# Ravi Patel
# Objective: Experiment with TorchKGE to recreate results and translate portions to run with Metal.
# Reference paper: https://papers.nips.cc/paper_files/paper/2013/file/1cecc7a77928ca8133fa24680a88d2f9-Paper.pdf
from torch.optim import Adam
import torch
from torch import tensor, bernoulli, randint, ones, rand, cat

from torchkge.utils.datasets import load_fb15k
from torchkge.models import ComplExModel, TransEModel
from torchkge.utils import MarginLoss, DataLoader
from torchkge.sampling import BernoulliNegativeSampler
from torchkge.evaluation import LinkPredictionEvaluator
from torchkge.evaluation import TripletClassificationEvaluator
from torchkge.utils.data import get_n_batches
from torchkge.sampling import NegativeSampler, get_bernoulli_probs

device = torch.device('mps')

  from tqdm.autonotebook import tqdm


In [2]:
train, val, test = load_fb15k('.')

In [3]:
train.get_df()

Unnamed: 0,from,to,rel
0,/m/027rn,/m/06cx9,/location/country/form_of_government
1,/m/017dcd,/m/06v8s0,/tv/tv_program/regular_cast./tv/regular_tv_app...
2,/m/07s9rl0,/m/0170z3,/media_common/netflix_genre/titles
3,/m/01sl1q,/m/044mz_,/award/award_winner/awards_won./award/award_ho...
4,/m/0cnk2q,/m/02nzb8,/soccer/football_team/current_roster./sports/s...
...,...,...,...
483137,/m/0gpx6,/m/0gq6s3,/award/award_nominated_work/award_nominations....
483138,/m/020jqv,/m/09d3b7,/award/award_nominee/award_nominations./award/...
483139,/m/0524b41,/m/0lp_cd3,/award/award_winning_work/awards_won./award/aw...
483140,/m/0kvsb,/m/050xpd,/people/person/education./education/education/...


In [4]:
test.get_df()

Unnamed: 0,from,to,rel
0,/m/01qscs,/m/02x8n1n,/award/award_nominee/award_nominations./award/...
1,/m/040db,/m/0148d,/base/activism/activist/area_of_activism
2,/m/08966,/m/05lf_,/travel/travel_destination/climate./travel/tra...
3,/m/01hww_,/m/01q99h,/music/performance_role/regular_performances./...
4,/m/0c1pj,/m/019f4v,/award/award_nominee/award_nominations./award/...
...,...,...,...
59066,/m/0727h,/m/0bk25,/military/military_conflict/combatants./milita...
59067,/m/01_njt,/m/06czyr,/award/award_nominee/award_nominations./award/...
59068,/m/07zhd7,/m/01l2m3,/people/deceased_person/cause_of_death
59069,/m/016ntp,/m/0155w,/music/artist/genre


In [5]:
model = TransEModel(100, train.n_ent, train.n_rel).to(device)
next(model.parameters()).device
# Verify model has been moved to GPU

device(type='mps', index=0)

In [6]:
model

TransEModel(
  (ent_emb): Embedding(14951, 100)
  (rel_emb): Embedding(1345, 100)
)

In [7]:
# Move loss to GPU
criterion = MarginLoss(0.5).to(device)
optimizer = Adam(model.parameters(), lr=0.003, weight_decay=1e-5)

In [8]:
# need to create new data loader and sampler so we can mps for mac
class DataLoader:
    """This class is inspired from :class:`torch.utils.dataloader.DataLoader`.
    It is however way simpler.

    """
    def __init__(self, kg, batch_size, use_mps=None):
        """

        Parameters
        ----------
        kg: torchkge.data_structures.KnowledgeGraph or torchkge.data_structures.SmallKG
            Knowledge graph in the form of an object implemented in
            torchkge.data_structures.
        batch_size: int
            Size of the required batches.
        use_cuda: str (opt, default = None)
            Can be either None (no use of cuda at all), 'all' to move all the
            dataset to cuda and then split in batches or 'batch' to simply move
            the batches to cuda before they are returned.
        """
        self.h = kg.head_idx
        self.t = kg.tail_idx
        self.r = kg.relations

        self.use_mps = use_mps
        self.batch_size = batch_size

        if use_mps:
            device = torch.device('mps')
            self.h = self.h.to(device)
            self.t = self.t.to(device)
            self.r = self.r.to(device)

    def __len__(self):
        return get_n_batches(len(self.h), self.batch_size)

    def __iter__(self):
        return _DataLoaderIter(self)


class _DataLoaderIter:
    def __init__(self, loader):
        self.h = loader.h
        self.t = loader.t
        self.r = loader.r

        self.use_mps = loader.use_mps
        self.batch_size = loader.batch_size

        self.n_batches = get_n_batches(len(self.h), self.batch_size)
        self.current_batch = 0

    def __next__(self):
        if self.current_batch == self.n_batches:
            raise StopIteration
        else:
            i = self.current_batch
            self.current_batch += 1

            tmp_h = self.h[i * self.batch_size: (i + 1) * self.batch_size]
            tmp_t = self.t[i * self.batch_size: (i + 1) * self.batch_size]
            tmp_r = self.r[i * self.batch_size: (i + 1) * self.batch_size]

            if self.use_mps:
                return tmp_h.to(device), tmp_t.to(device), tmp_r.to(device)
            else:
                return tmp_h, tmp_t, tmp_r

    def __iter__(self):
        return self

class BernoulliNegativeSampler(NegativeSampler):
    """Bernoulli negative sampler as presented in 2014 paper by Wang et al..
    Either the head or the tail of a triplet is replaced by another entity at
    random. The choice of head/tail is done using probabilities taking into
    account profiles of the relations. See the paper for more details. This
    class inherits from the
    :class:`torchkge.sampling.NegativeSampler` interface.
    It then has its attributes as well.

    References
    ----------
    * Zhen Wang, Jianwen Zhang, Jianlin Feng, and Zheng Chen.
      Knowledge Graph Embedding by Translating on Hyperplanes.
      In Twenty-Eighth AAAI Conference on Artificial Intelligence, June 2014.
      https://www.aaai.org/ocs/index.php/AAAI/AAAI14/paper/view/8531

    Parameters
    ----------
    kg: torchkge.data_structures.KnowledgeGraph
        Main knowledge graph (usually training one).
    kg_val: torchkge.data_structures.KnowledgeGraph (optional)
        Validation knowledge graph.
    kg_test: torchkge.data_structures.KnowledgeGraph (optional)
        Test knowledge graph.
    n_neg: int
        Number of negative sample to create from each fact.
    Attributes
    ----------
    bern_probs: torch.Tensor, dtype: torch.float, shape: (kg.n_rel)
        Bernoulli sampling probabilities. See paper for more details.

    """

    def __init__(self, kg, kg_val=None, kg_test=None, n_neg=1):
        super().__init__(kg, kg_val, kg_test, n_neg)
        self.bern_probs = self.evaluate_probabilities()

    def evaluate_probabilities(self):
        """Evaluate the Bernoulli probabilities for negative sampling as in the
        TransH original paper by Wang et al. (2014).
        """
        bern_probs = get_bernoulli_probs(self.kg)

        tmp = []
        for i in range(self.kg.n_rel):
            if i in bern_probs.keys():
                tmp.append(bern_probs[i])
            else:
                tmp.append(0.5)

        return tensor(tmp).float()

    def corrupt_batch(self, heads, tails, relations, n_neg=None):
        """For each true triplet, produce a corrupted one assumed to be different
        from any other true triplet. If `heads` and `tails` are cuda objects,
        then the returned tensors are on the GPU.

        Parameters
        ----------
        heads: torch.Tensor, dtype: torch.long, shape: (batch_size)
            Tensor containing the integer key of heads of the relations in the
            current batch.
        tails: torch.Tensor, dtype: torch.long, shape: (batch_size)
            Tensor containing the integer key of tails of the relations in the
            current batch.
        relations: torch.Tensor, dtype: torch.long, shape: (batch_size)
            Tensor containing the integer key of relations in the current
            batch.
        n_neg: int (opt)
            Number of negative sample to create from each fact. It overwrites
            the value set at the construction of the sampler.
        Returns
        -------
        neg_heads: torch.Tensor, dtype: torch.long, shape: (batch_size)
            Tensor containing the integer key of negatively sampled heads of
            the relations in the current batch.
        neg_tails: torch.Tensor, dtype: torch.long, shape: (batch_size)
            Tensor containing the integer key of negatively sampled tails of
            the relations in the current batch.
        """
        if n_neg is None:
            n_neg = self.n_neg

        device = heads.device
        assert (device == tails.device)

        batch_size = heads.shape[0]
        neg_heads = heads.repeat(n_neg)
        neg_tails = tails.repeat(n_neg)

        # Randomly choose which samples will have head/tail corrupted
        self.bern_probs = self.bern_probs.to(device)
        mask = bernoulli(self.bern_probs[relations].repeat(n_neg))
        n_h_cor = int(mask.sum().item())
        neg_heads[mask == 1] = randint(1, self.n_ent,
                                       (n_h_cor,),
                                       device=device)
        neg_tails[mask == 0] = randint(1, self.n_ent,
                                       (batch_size * n_neg - n_h_cor,),
                                       device=device)

        return neg_heads.long(), neg_tails.long()

In [9]:
# Sampler adds curruption to each batch creating positive and negative training values. 
sampler = BernoulliNegativeSampler(train)
dataloader = DataLoader(train, batch_size=128, use_mps=True)

In [10]:
for epoch in range(10):
    running_loss = 0.0
    for i, batch in enumerate(dataloader):
        h, t, r = batch[0], batch[1], batch[2]
        n_h, n_t = sampler.corrupt_batch(h, t, r)

        optimizer.zero_grad()

        # get preds from model with inputs head, tail, relation, negative head, and negative tail
        pos, neg = model(h, t, r, n_h, n_t)
        # forword
        loss = criterion(pos, neg)
        # backward
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    model.normalize_parameters()
    # per paper, normalize weights
    print(
        'Epoch {} | mean loss: {:.5f}'.format(epoch + 1,
                                              running_loss / len(dataloader)))

Epoch 1 | mean loss: 11.79292
Epoch 2 | mean loss: 4.03566
Epoch 3 | mean loss: 3.70182
Epoch 4 | mean loss: 3.56905
Epoch 5 | mean loss: 3.39074
Epoch 6 | mean loss: 3.27700
Epoch 7 | mean loss: 3.14225
Epoch 8 | mean loss: 3.16809
Epoch 9 | mean loss: 3.10829
Epoch 10 | mean loss: 3.05974


In [11]:
# Manually move the values to the gpu, because the LinkPredictionEvaluator assumes 
# input data will be KnowledgeGraph type.
test.head_idx = test.head_idx.to(device)
test.tail_idx = test.tail_idx.to(device)
test.relations = test.relations.to(device)

evaluator = LinkPredictionEvaluator(model, test)
evaluator.evaluate(b_size=32)
evaluator.print_results()

Link prediction evaluation: 100%|███████████████████████████████████████████████| 1846/1846 [02:31<00:00, 12.17batch/s]

Hit@10 : 0.959 		 Filt. Hit@10 : 0.919
Mean Rank : 92696475598848 	 Filt. Mean Rank : 185392951197696
MRR : inf 		 Filt. MRR : inf





In [12]:
# Mean Rank (MR), and Mean Reciprocal Rank (MRR) are off here. Will need to investigate farther.
# Hit@10 are reasonably close to values outlined in paper: 
# https://papers.nips.cc/paper_files/paper/2013/file/1cecc7a77928ca8133fa24680a88d2f9-Paper.pdf
# Possible optimizations:
# - adjust dissimilarity_type in translation model