In [1]:
from torch import cuda
from torch.optim import Adam

import pandas as pd
import networkx as nx
import torchkge

from torchkge.models import TransEModel
from torchkge.sampling import BernoulliNegativeSampler
from torchkge.utils import MarginLoss, DataLoader

from tqdm.autonotebook import tqdm

In [2]:
df = pd.read_csv('attributes.tsv', sep='\t')

In [3]:
df.columns=['from', 'to', 'rel']

In [4]:
df

Unnamed: 0,from,to,rel
0,0,502600,0
1,0,437670,1
2,0,451979,2
3,0,312063,3
4,0,324688,3
...,...,...,...
3371608,297699,464443,9
3371609,297699,359947,131
3371610,297699,537997,77
3371611,297699,359947,128


In [5]:
kg = torchkge.data_structures.KnowledgeGraph(df)

In [6]:
%%time
kg_train, kg_test = kg.split_kg(share=0.98)

CPU times: user 1min 16s, sys: 370 ms, total: 1min 16s
Wall time: 34.8 s


In [7]:
# Define some hyper-parameters for training
emb_dim = 100
lr = 0.01
n_epochs = 100
b_size = 272560
margin = 0.5

# Define the model and criterion
model = TransEModel(emb_dim, kg_train.n_ent, kg_train.n_rel, dissimilarity_type='L2')
criterion = MarginLoss(margin)

# Move everything to CUDA if available
if cuda.is_available():
    cuda.empty_cache()
    model.cuda()
    criterion.cuda()
    use_cuda = 'all'
else:
    use_cuda = 'None'

# Define the torch optimizer to be used
optimizer = Adam(model.parameters(), lr=lr)

sampler = BernoulliNegativeSampler(kg_train)
dataloader = DataLoader(kg_train, batch_size=b_size, use_cuda=use_cuda)

iterator = tqdm(range(n_epochs), unit='epoch')
for epoch in iterator:
    running_loss = 0.0
    for i, batch in enumerate(dataloader):
        h, t, r = batch[0], batch[1], batch[2]
        n_h, n_t = sampler.corrupt_batch(h, t, r)

        optimizer.zero_grad()

        # forward + backward + optimize
        pos, neg = model(h, t, n_h, n_t, r)
        loss = criterion(pos, neg)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    iterator.set_description(
        'Epoch {} | mean loss: {:.5f}'.format(epoch + 1,
                                              running_loss / len(dataloader)))

model.normalize_parameters()

    Found GPU0 GeForce GTX 760 which is of cuda capability 3.0.
    PyTorch no longer supports this GPU because it is too old.
    The minimum cuda capability that we support is 3.5.
    


HBox(children=(FloatProgress(value=0.0), HTML(value='')))




In [8]:
from torchkge.evaluation import LinkPredictionEvaluator

In [10]:
evaluator = LinkPredictionEvaluator(model, kg_test)
evaluator.evaluate(b_size=2, k_max=10)
evaluator.print_results()

HBox(children=(FloatProgress(value=0.0, description='Link prediction evaluation', max=32557.0, style=ProgressS…


Hit@10 : 0.241 		 Filt. Hit@10 : 0.252
Mean Rank : 44420 	 Filt. Mean Rank : 37063
MRR : 0.15 		 Filt. MRR : 0.155
