In [3]:
class Data:
    def __init__(self, data_dir="data/FB15k-237/", reverse=False):
        self.train_data = self.load_data(data_dir, "train", reverse=reverse)
        self.valid_data = self.load_data(data_dir, "valid", reverse=reverse)
        self.test_data = self.load_data(data_dir, "test", reverse=reverse)
        self.data = self.train_data + self.valid_data + self.test_data
        self.entities = self.get_entities(self.data)
        self.relations = self.get_relations(self.data)
        self.train_relations = self.get_relations(self.train_data)
        self.valid_relations = self.get_relations(self.valid_data)
        self.test_relations = self.get_relations(self.test_data)

    @staticmethod
    def load_data(data_dir, data_type="train", reverse=False):
        with open("%s%s.txt" % (data_dir, data_type), "r") as f:
            data = f.read().strip().split("\n")
            data = [i.split() for i in data]
            if reverse:
                data += [[i[2], i[1] + "_reverse", i[0]] for i in data]
        return data

    @staticmethod
    def get_relations(data):
        relations = sorted(list(set([d[1] for d in data])))
        return relations

    @staticmethod
    def get_entities(data):
        entities = sorted(list(set([d[0] for d in data] + [d[2] for d in data])))
        return entities


In [5]:
! pip3 install tucker_riemopt

Collecting tucker_riemopt
  Downloading tucker_riemopt-1.0.1-py3-none-any.whl.metadata (3.3 kB)
Downloading tucker_riemopt-1.0.1-py3-none-any.whl (30 kB)
Installing collected packages: tucker_riemopt
Successfully installed tucker_riemopt-1.0.1


In [4]:
import torch
import numpy as np
from torch import nn
from torch.nn.init import xavier_normal_
from tucker_riemopt import SFTucker

from torch.optim import Optimizer
from tucker_riemopt import SFTuckerRiemannian

In [6]:
class SFTuckER:
    def __init__(self, d, d1, d2):
        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        self.rank = (d2, d1, d1)
        self.E = torch.rand((len(d.entities), d1), device=device)
        self.R = torch.rand((len(d.relations), d2), device=device)
        self.W = torch.tensor(np.random.uniform(-1, 1, (d2, d1, d1)), dtype=torch.float, device=device)
        
    def parameters(self):
        return nn.ParameterList([self.W, self.E, self.R])

    def init(self):
        xavier_normal_(self.E.data)
        xavier_normal_(self.R.data)
        # with torch.no_grad():
        #     self.E.weight.data = torch.linalg.qr(self.E.weight)[0]
        #     self.R.weight.data = torch.linalg.qr(self.R.weight)[0]

    def forward(self, e_idx, r_idx):
        relations = self.R[r_idx, :]
        subjects = self.E[e_idx, :]
        preds = torch.einsum("abc,da->dbc", self.W, relations)
        preds = torch.bmm(subjects.view(-1, 1, subjects.shape[1]), preds).view(-1, subjects.shape[1])
        preds = preds @ self.E.T
        return torch.sigmoid(preds)


class RGD(Optimizer):
    def __init__(self, model_parameters, rank, max_lr):
        self.rank = rank
        self.max_lr = max_lr
        self.lr = max_lr
        self.direction = None
        self.loss = None

        defaults = dict(rank=rank, max_lr=self.max_lr, lr=self.lr)
        params = model_parameters
        super().__init__(params, defaults)

    def fit(self, loss_fn, model, normalize_grad=False):
        x_k = SFTucker(model.W.data, [model.R.data], num_shared_factors=2, shared_factor=model.E.data)
        rgrad, self.loss = SFTuckerRiemannian.grad(loss_fn, x_k)
        rgrad_norm = rgrad.norm().detach()

        if normalize_grad:
            normalizer = normalize_grad / rgrad_norm
        else:
            normalizer = 1

        self.direction = normalizer * rgrad
        return rgrad_norm

    @torch.no_grad()
    def step(self):
        W, E, R = self.param_groups[0]["params"]

        x_k = self.direction.point
        x_k = (-self.param_groups[0]["lr"]) * self.direction + SFTuckerRiemannian.TangentVector(x_k)
        x_k = x_k.construct().round(self.rank)

        W.data.add_(x_k.core - W)
        R.data.add_(x_k.regular_factors[0] - R)
        E.data.add_(x_k.shared_factor - E)



class SFTuckerAdam(RGD):
    def __init__(self, params, rank, max_lr, betas=(0.9, 0.999), eps=1e-8, step_velocity=1):
        super().__init__(params, rank, max_lr)
        self.betas = betas
        self.eps = eps
        self.step_velocity = step_velocity
        
        self.momentum = None
        self.second_momentum = torch.zeros(1, device="cuda")
        
        self.step_t = 1

    def fit(self, loss_fn, model, normalize_grad = 1.):
        x_k = SFTucker(model.W.data, [model.R.data], num_shared_factors=2, shared_factor=model.E.data)
        rgrad, self.loss = SFTuckerRiemannian.grad(loss_fn, x_k)
        rgrad_norm = rgrad.norm().detach()
        if self.momentum is not None:
            self.momentum = SFTuckerRiemannian.project(x_k, self.momentum.construct())
            self.momentum = self.betas[0] * self.momentum + (1 - self.betas[0]) * rgrad
        else:
            self.momentum = (1 - self.betas[0]) * rgrad
        self.second_momentum = self.betas[1] * self.second_momentum + (1 - self.betas[1]) * rgrad_norm ** 2
        second_momentum_corrected = self.second_momentum / (1 - self.betas[1] ** (self.step_t // self.step_velocity + 1))
        bias_correction_ratio = (1 - self.betas[0] ** (self.step_t // self.step_velocity + 1)) * torch.sqrt(
            second_momentum_corrected
        ) + self.eps
        self.direction = (1 / bias_correction_ratio) * self.momentum
        return rgrad_norm

    @torch.no_grad()
    def step(self, closure=None):
        W, E, R = self.param_groups[0]["params"]

        x_k = self.direction.point
        x_k = (-self.param_groups[0]["lr"]) * self.direction + SFTuckerRiemannian.TangentVector(x_k)
        x_k = x_k.construct().round(self.rank)

        W.data.add_(x_k.core - W)
        R.data.add_(x_k.regular_factors[0] - R)
        E.data.add_(x_k.shared_factor - E)
        
        self.step_t += 1



In [7]:
import numpy as np
import torch
import time
from collections import defaultdict
from torch.optim.lr_scheduler import ExponentialLR
import argparse


def get_loss_fn(e_idx, r_idx, targets, criterion):
    def loss_fn(T: SFTucker):
        relations = T.regular_factors[0][r_idx, :]
        subjects = T.shared_factor[e_idx, :]
        preds = torch.einsum("abc,da->dbc", T.core, relations)
        preds = torch.bmm(subjects.view(-1, 1, subjects.shape[1]), preds).view(-1, subjects.shape[1])
        preds = preds @ T.shared_factor.T
        return criterion(torch.sigmoid(preds), targets)

    return loss_fn


class Experiment:
    def __init__(self, learning_rate=0.0005, ent_vec_dim=200, rel_vec_dim=200,
                 num_iterations=500, batch_size=1024, decay_rate=0., label_smoothing=0.):
        self.learning_rate = learning_rate
        self.ent_vec_dim = ent_vec_dim
        self.rel_vec_dim = rel_vec_dim
        self.num_iterations = num_iterations
        self.batch_size = batch_size
        self.decay_rate = decay_rate
        self.label_smoothing = label_smoothing
        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        self.criterion = torch.nn.BCELoss()

    def get_data_idxs(self, data):
        data_idxs = [(self.entity_idxs[data[i][0]], self.relation_idxs[data[i][1]], self.entity_idxs[data[i][2]]) for i
                     in range(len(data))]
        return data_idxs

    def get_er_vocab(self, data):
        er_vocab = defaultdict(list)
        for triple in data:
            er_vocab[(triple[0], triple[1])].append(triple[2])
        return er_vocab

    def get_batch(self, er_vocab, er_vocab_pairs, idx):
        batch = er_vocab_pairs[idx:idx + self.batch_size]
        targets = np.zeros((len(batch), len(d.entities)))
        for idx, pair in enumerate(batch):
            targets[idx, er_vocab[pair]] = 1.
        targets = torch.FloatTensor(targets).to(self.device)
        return np.array(batch), targets

    def evaluate(self, model, data):
        hits = []
        ranks = []
        for i in range(10):
            hits.append([])

        test_data_idxs = self.get_data_idxs(data)
        er_vocab = self.get_er_vocab(self.get_data_idxs(d.data))

        losses = []
        np.random.shuffle(test_data_idxs)
        for i in range(0, len(test_data_idxs), self.batch_size):
            data_batch, targets = self.get_batch(er_vocab, test_data_idxs, i)
            e1_idx = torch.tensor(data_batch[:, 0]).to(self.device)
            r_idx = torch.tensor(data_batch[:, 1]).to(self.device)
            e2_idx = torch.tensor(data_batch[:, 2]).to(self.device)

            targets = ((1.0 - self.label_smoothing) * targets) + (1.0 / targets.size(1))

            predictions = model.forward(e1_idx, r_idx)

            losses.append(self.criterion(predictions, targets).item())

            for j in range(data_batch.shape[0]):
                filt = er_vocab[(data_batch[j][0], data_batch[j][1])]
                target_value = predictions[j, e2_idx[j]].item()
                predictions[j, filt] = 0.0
                predictions[j, e2_idx[j]] = target_value

            sort_values, sort_idxs = torch.sort(predictions, dim=1, descending=True)

            sort_idxs = sort_idxs.cpu().numpy()
            for j in range(data_batch.shape[0]):
                rank = np.where(sort_idxs[j] == e2_idx[j].item())[0][0]
                ranks.append(rank + 1)

                for hits_level in range(10):
                    if rank <= hits_level:
                        hits[hits_level].append(1.0)
                    else:
                        hits[hits_level].append(0.0)

        print('val_loss:', np.mean(losses))
        print('Hits @10: {0}'.format(np.mean(hits[9])))
        print('Hits @3: {0}'.format(np.mean(hits[2])))
        print('Hits @1: {0}'.format(np.mean(hits[0])))
        print('Mean reciprocal rank: {0}'.format(np.mean(1. / np.array(ranks))))

    def train_and_eval(self):
        print("Training the TuckER model...")
        self.entity_idxs = {d.entities[i]: i for i in range(len(d.entities))}
        self.relation_idxs = {d.relations[i]: i for i in range(len(d.relations))}

        train_data_idxs = self.get_data_idxs(d.train_data)
        print("Number of training data points: %d" % len(train_data_idxs))

        model = SFTuckER(d, self.ent_vec_dim, self.rel_vec_dim)

        model.init()

        opt = RGD(model.parameters(), (self.rel_vec_dim, self.ent_vec_dim, self.ent_vec_dim), self.learning_rate)
        if self.decay_rate:
            scheduler = ExponentialLR(opt, self.decay_rate)

        er_vocab = self.get_er_vocab(train_data_idxs)
        er_vocab_pairs = list(er_vocab.keys())

        print("Starting training...")
        for it in range(1, self.num_iterations + 1):
            print('Epoch:', it)
            start_train = time.time()
            losses = []
            np.random.shuffle(er_vocab_pairs)
            for j in range(0, len(er_vocab_pairs), self.batch_size):
                data_batch, targets = self.get_batch(er_vocab, er_vocab_pairs, j)
                opt.zero_grad()
                e1_idx = torch.tensor(data_batch[:, 0]).to(self.device)
                r_idx = torch.tensor(data_batch[:, 1]).to(self.device)

                targets = ((1.0 - self.label_smoothing) * targets) + (1.0 / targets.size(1))

                loss_fn = get_loss_fn(e1_idx, r_idx, targets, self.criterion)
                grad_norm = opt.fit(loss_fn, model)
                opt.step()
                opt.zero_grad(set_to_none=True)

                loss = opt.loss.detach()
                print(j / self.batch_size, loss)

                losses.append(loss.item())
            if self.decay_rate:
                scheduler.step()
            # print('time:', time.time() - start_train)
            print(np.mean(losses))
            with torch.no_grad():
                print("Validation:")
                self.evaluate(model, d.valid_data)
#                 if it % 5 == 0:
#                     print("Test:")
#                     self.evaluate(model, d.test_data)

In [9]:
dataset = "FB15k-237"
num_iterations = 500
batch_size = 64
lr = 1e9
dr = 1.0
edim = 200
rdim = 200
label_smoothing = 0.1

data_dir = "data/%s/" % dataset
torch.backends.cudnn.deterministic = True
seed = 20
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)
d = Data(data_dir=data_dir, reverse=True)
experiment = Experiment(num_iterations=num_iterations, batch_size=batch_size, learning_rate=lr,
                        decay_rate=dr, ent_vec_dim=edim, rel_vec_dim=rdim, label_smoothing=label_smoothing)
experiment.train_and_eval()

Training the TuckER model...
Number of training data points: 544230
Starting training...
Epoch: 1
0.0 tensor(0.6932)
1.0 tensor(0.6944)
2.0 tensor(0.6862)
3.0 tensor(0.6973)
4.0 tensor(0.6869)
5.0 tensor(0.7258)


KeyboardInterrupt: 