Write implementation on PyTorch for TransE model (you can use TorchGeometric or DGL library for working with graphs) and train your model on WordNet18RR dataset (you can use loaded dataset from any graph library).

As a result, you must provide a link to github (or gitlab) with all the source code.
The readability of the code, the presence of comments, type annotations, and the quality of the code as a whole will be taken into account when checking the test case.

### Imports and helper Functions

In [17]:
import os
import numpy as np
import matplotlib.pyplot as plt
from typing import List, Union, Callable, Optional, Tuple
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch_geometric.datasets import WordNet18RR, WordNet18
from torch_geometric.loader import DataLoader
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger

# needed 
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

# check if CUDA device is available
torch.cuda.is_available()

True

Both versions of the WordNet18 dataset have been imported through the PyTorch Geometric library. We highlight that training/testing on the original WN18 was done solely to compare the implemented model best performance against the results from **Borders et al.** (table 3 page 6).

In [2]:
# download wordnet dataset
WordNet18('./WordNet18/')
WordNet18RR('./WordNet18RR/')

WordNet18RR()

### Custom Dataset and DataLoader

Here we define a custom dataset that reads from the raw directory of the WordNet18 downloaded in the previous section.

In [14]:
class Edge():
    def __init__(self, head, tail, rel) -> None:
        self.head = head
        self.tail = tail
        self.rel = rel

    def __str__(self) -> str:
        return f"{self.head} {self.rel} {self.tail}"

def process_lines(lines: List[str], delim: str='\t'):
    """ cleans up the input set of strings """
    return list(map(lambda s: s.strip('\n').split(delim), lines))

def load_edges_from_file(path: str, is_wn18: bool=True):
    """ read edges from the text file in raw, considering the different
        formats of RR (head, rel, tail) and original version (head, tail, rel) """
    edge_list = list()

    lines = open(path).readlines()

    # WN18 contains a header line and has a format (head, tail, rel)
    if is_wn18:
        lines = lines[1:]
        delim = ' '
    else:
        delim = '\t'

    lines = process_lines(lines, delim=delim)

    # the two WN version have a different format to represent edges/relation
    if is_wn18:
        edge_list = [Edge(head=head, tail=tail, rel=rel) for head, tail, rel in lines]
    else:
        edge_list = [Edge(head=head, tail=tail, rel=rel) for head, rel, tail in lines]
    
    return edge_list

def load_ids_dict(path: str) -> Union[dict, dict]:
    """ reads and return the dictionaries entity->id and relation->id 
        from the specified location """
    
    assert(os.path.exists(path))

    entity2id = process_lines(open(os.path.join(path, "entity2id.txt")))
    relation2id = process_lines(open(os.path.join(path, "relation2id.txt")))

    entity2id = dict([(x[0], int(x[1])) for x in entity2id])
    relation2id = dict([(x[0], int(x[1])) for x in relation2id])

    return entity2id, relation2id

def create_id_mappings(dataset_str: str="WordNet18RR") -> None:
    """ creates the mapping ids inside the raw directory of the 
        specified version of WordNet18 """

    assert(dataset_str in ["WordNet18", "WordNet18RR"])

    is_wn18 = dataset_str == "WordNet18"
    path = f"./{dataset_str}/raw/"

    if not os.path.exists(path):
        print(f"Directory {path} does not exist")
        return
    
    # read edge_list from the raw text files
    train_edge_list = load_edges_from_file(os.path.join(path, "train.txt"), is_wn18=is_wn18)
    val_edge_list = load_edges_from_file(os.path.join(path, "valid.txt"), is_wn18=is_wn18)
    test_edge_list = load_edges_from_file(os.path.join(path, "test.txt"), is_wn18=is_wn18)

    entity_list = list()
    relation_list = list()

    # assign unique id to each entity/relation
    for edge_list in [train_edge_list, val_edge_list, test_edge_list]:
        entity_list += [x.head for x in edge_list] + [x.tail for x in edge_list]
        relation_list += [x.rel for x in edge_list]

    entity_list = sorted(list(set(entity_list)))
    entity2id = dict(zip(entity_list, range(len(entity_list))))

    relation_list = sorted(list(set(relation_list)))
    relation2id = dict(zip(relation_list, range(len(relation_list))))

    # save the generated mappings into the raw directory
    with open(os.path.join(path, "entity2id.txt"), "w") as f:
        f.writelines([f"{x}\t{y}\n" for x,y in entity2id.items()])

    with open(os.path.join(path, "relation2id.txt"), "w") as f:
        f.writelines([f"{x}\t{y}\n" for x,y in relation2id.items()])

To generate the mappings we simply call the *create_id_mappings()* function specifying the WN version we want to work with.

In [15]:
create_id_mappings("WordNet18RR")

Next we define a PyTorch dataset and a Data Module that can be handled by PyTorch Lightning.

In [16]:
class WordNetDataset(Dataset):
    def __init__(self, dataset: str="WordNet18RR", split="train") -> None:
        super().__init__()
        self.path = f"./{dataset}/raw"

        if split == 'val':
            split = 'valid'
        self.split = split

        is_wn18 = dataset == "WordNet18"

        edge_list = load_edges_from_file(os.path.join(self.path, f"{self.split}.txt"), is_wn18=is_wn18)
        entity2id, relation2id = load_ids_dict(path=self.path)

        self.edge_list = torch.tensor([(entity2id[e.head], entity2id[e.tail]) for e in edge_list])
        self.relation_list = torch.tensor([relation2id[e.rel] for e in edge_list])
    
    def __len__(self):
        return self.edge_list.shape[0]

    def __getitem__(self, index) -> Tuple[int,int]:
        return self.edge_list[index], self.relation_list[index]

class WordNetDataModule(pl.LightningDataModule):
    def __init__(self, dataset: str="WordNet18RR", batch_size=32) -> None:
        super().__init__()
        self.path = f"./{dataset}/raw"
        self.dataset = dataset
        self.batch_size = batch_size
        self.num_entities = 40943

        if dataset == 'WordNet18RR':
            self.num_relations = 11
        else:
            self.num_relations = 18
        
        self.params = {"pin_memory": True, "batch_size": batch_size}

    def setup(self, stage: str):
        if stage == "fit":
            self.train_dataset = WordNetDataset(dataset=self.dataset, split="train")
            self.val_dataset = WordNetDataset(dataset=self.dataset, split="valid")
        
        if stage == "predict":
            self.test_dataset = WordNetDataset(dataset=self.dataset, split="test")
    
    def train_dataloader(self):
        return DataLoader(self.train_dataset, shuffle=True, **self.params)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, shuffle=False, **self.params)

    def predict_dataloader(self):
        return DataLoader(self.test_dataset, shuffle=False, **self.params)


### Lightning Model

In [95]:
class TransE(pl.LightningModule):
    def __init__(self, margin: int=1, emb_dim: int=20, learning_rate=0.01, p_norm=1, dataset="WordNet18RR") -> None:
        """ Instatiate the entity and relation matrix of the TransE model
            https://papers.nips.cc/paper/5071-translating-embeddings-for-modeling-multi-relational-data

        Args:
            n_entities (int): _description_
            n_relations (int): _description_
            margin (int, optional): _description_. Defaults to 1.
            emb_dim (int, optional): _description_. Defaults to 50.
        """
        super().__init__()
        self.margin = margin
        self.emb_dim = emb_dim
        self.learning_rate = learning_rate
        self.p_norm = p_norm

        # dataset specific values
        self.num_entities = 40943

        if dataset == "WordNet18":
            self.num_relations = 18
        else:
            self.num_relations = 11

        # initialize embeddings
        self.entity_mat = nn.Embedding(self.num_entities, emb_dim).to(self.device)
        self.relation_mat = nn.Embedding(self.num_relations, emb_dim).to(self.device)

        with torch.no_grad():
            # initialize with random uniform
            val = 6/np.sqrt(emb_dim)
            self.entity_mat.weight.uniform_(-val, val)
            self.relation_mat.weight.uniform_(-val, val)

            # normalize entity and relation embeddings
            self.entity_mat.weight.copy_(F.normalize(self.entity_mat.weight, p=self.p_norm, dim=-1))
            self.relation_mat.weight.copy_(F.normalize(self.relation_mat.weight, p=self.p_norm, dim=-1))

    def corrupt_edge_list_OLDVERSION(self, edge_list: torch.Tensor) -> Union[torch.tensor, torch.tensor]:
        """ given a list of edges return two lists of edges where either head 
            or tail has been randomly replaced by any other entity """
        n = edge_list.shape[0]
        entity_list = range(self.num_entities)

        # sample random entity replacements
        r1 = np.random.choice(entity_list, size=n)
        r2 = np.random.choice(entity_list, size=n)

        corrupted_heads = edge_list.detach().clone()
        corrupted_tails = edge_list.detach().clone()

        corrupted_heads[:,0] = torch.from_numpy(r1)
        corrupted_tails[:,1] = torch.from_numpy(r2)
                
        return corrupted_heads, corrupted_tails

    def corrupt_edge_list(self, edge_list: torch.Tensor) -> Union[torch.tensor, torch.tensor]:
        """ given a list of edges return two lists of edges where either head 
            or tail has been randomly replaced by any other entity """
        n = edge_list.shape[0]
        entity_list = range(self.num_entities)

        heads = edge_list[:,0]
        tails = edge_list[:, 1]

        # sample random entity replacements
        sample = np.random.choice(entity_list, size=n)
        sample = torch.from_numpy(sample).type(torch.int64).to(self.device)

        # random selection of either head or tail
        pos = np.random.choice([0, 1], size=n)
        pos = torch.from_numpy(pos).type(torch.int64).to(self.device)
        pos = pos.reshape(-1,1)

        corrupted_heads = torch.vstack([heads, sample]).T
        corrupted_heads = corrupted_heads.gather(1, pos.reshape(-1,1))

        corrupted_tails = torch.vstack([tails, sample]).T
        corrupted_tails = corrupted_tails.gather(1, (1-pos).reshape(-1,1))

        corrupted_triplet = torch.hstack([corrupted_heads, corrupted_tails])
        corrupted_triplet
        
        return corrupted_triplet

    
    def embedding_loss(self, batch):
        edge_list, labels = batch
        
        loss = torch.zeros(1).to(self.device)

        corrupted_triplet = self.corrupt_edge_list(edge_list)
        #corrupted_heads, corrupted_tails = self.corrupt_edge_list(edge_list)
        
        # take embedding values for entities and relations
        """ t1 = self.entity_mat.weight[edge_list.repeat(2,1)]
        t2 = torch.vstack([self.entity_mat.weight[corrupted_heads],
                           self.entity_mat.weight[corrupted_tails]])
        rel = self.relation_mat.weight[labels].repeat(2,1) """

        t1 = self.entity_mat.weight[edge_list]
        t2 = self.entity_mat.weight[corrupted_triplet]
        rel = self.relation_mat.weight[labels]

        # normalize entity (maybe unnecessary here)
        t1 = F.normalize(t1, p=self.p_norm, dim=-1)
        t2 = F.normalize(t2, p=self.p_norm, dim=-1)

        # compute the loss value
        pos = torch.norm(t1[:,0,:] + rel - t1[:,1,:], dim=-1, p=self.p_norm)
        neg = torch.norm(t2[:,0,:] + rel - t2[:,1,:], dim=-1, p=self.p_norm)
        loss = torch.clip((self.margin + pos - neg), min=0).sum()
        
        """ target = torch.ones(pos.shape[0]).to(self.device)
        torch.set_grad_enabled(True)
        F.margin_ranking_loss(input1=pos, input2=neg, target=target, margin=self.margin) """

        return loss

    def evaluation_protocol(self, batch):
        edge_list, labels = batch
        batch_size = edge_list.shape[0]

        # combine heads, tails and labels
        triplets = torch.hstack([edge_list, labels.reshape(-1,1)])

        # repeat all triplets for n_entities times
        triplets = triplets[:,np.newaxis,:].repeat(1,self.num_entities,1)

        true_pos_total = list()
        rank_pos_list = list()

        # repeat corruption for both head and tail
        for pos in [0,1]:
            x = triplets.detach().clone()
            
            # replace all heads/tails with list of all possible entities
            x[:,:,pos] = torch.tensor(range(self.num_entities))[np.newaxis,:].repeat(batch_size,1).to(self.device)

            # triplets are arranged as (head, tail, label)
            head = self.entity_mat.weight[x[:,:,0]]
            tail = self.entity_mat.weight[x[:,:,1]]
            rel = self.relation_mat.weight[x[:,:,2]]

            # compute distance between head + label and tail
            norms = torch.norm(head + rel - tail, dim=-1, p=self.p_norm)

            # get index positions of sorted norms for each triplet
            rankings = torch.vstack([torch.argsort(x) for x in norms.unbind(dim=0)])

            # find position of heads within the rankings
            torch.save(rankings, "rankings.pt")
            torch.save(edge_list, "edge_list.pt")
            rank_pos = torch.where(rankings == edge_list[:,pos].reshape(-1,1))[1]

            rank_pos_list.append(rank_pos)
            true_pos_total.append(rank_pos < 10)

        return torch.vstack(rank_pos_list).flatten()

    def training_step(self, batch, batch_idx):
        return self.embedding_loss(batch)

    def validation_step(self, batch, batch_idx):
        loss = self.embedding_loss(batch)
        batch_rankings = self.evaluation_protocol(batch)
        self.log_dict({"val_loss": loss}, prog_bar=True, on_epoch=True)
        return {"val_loss": loss, "batch_rankings": batch_rankings}
    
    def predict_step(self, batch, batch_idx):
        batch_rankings = self.evaluation_protocol(batch)
        return {"batch_rankings": batch_rankings}

    def on_train_epoch_end(self):
        with torch.no_grad():
             # keep entities embeddings normalized
            self.entity_mat.weight.copy_(F.normalize(self.entity_mat.weight, p=2, dim=1))
    
    def compute_epoch_metrics(self, outputs, stage, log_value=True):
        epoch_rankings = torch.hstack([x['batch_rankings'] for x in outputs])
        mean_rank = epoch_rankings.float().float().mean()
        hit_at_10 = (epoch_rankings < 10).float().mean()*100
        if log_value:
            self.log_dict({f"{stage}_mean_rank": mean_rank,
                           f"{stage}_hits@10": hit_at_10},
                           prog_bar=True, on_epoch=True)
        else:
            return mean_rank, hit_at_10
    
    def validation_epoch_end(self, outputs):
        self.compute_epoch_metrics(outputs, stage="val")

    def prediction_epoch_end(self, outputs):
        self.compute_epoch_metrics(outputs, stage="predict")

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        

In [27]:
def train_transe(config,
                 max_epochs=100,
                 accelerator='gpu',
                 num_best_ckpt=3,
                 patience=10,
                 min_delta=0.5,
                 main_path='./',
                 dataset='WordNet18RR'):
    
    model = TransE(emb_dim=config['emb_dim'],
                learning_rate=config['lr'],
                margin=config['margin'],
                p_norm=config['p_norm'],
                dataset=dataset)

    dm = WordNetDataModule(batch_size=config['batch_size'], dataset=dataset)

    dir_path = f"ckpt_{dataset}/emb_dim={config['emb_dim']}-lr={config['lr']}-margin={config['margin']}-p_norm={config['p_norm']}"
    dir_path = os.path.join(main_path, dir_path)

    print(dir_path)

    # using mean predicted rank on validation set as described in section 4.2
    early_stop_rank = EarlyStopping(monitor="val_mean_rank",
                                    min_delta=min_delta,
                                    patience=patience,
                                    verbose=False,
                                    mode="min")

    # save best models based on mean rank on validation set
    checkpoint_callback = ModelCheckpoint(save_top_k=num_best_ckpt,
                                        monitor="val_mean_rank",
                                        dirpath=dir_path,
                                        filename="transe-{dataset}-{epoch}-{val_mean_rank:.0f}-{val_hits@10:.1f}")

    logger = TensorBoardLogger(f'tb_logs_{dataset}', name='TransE')

    trainer = pl.Trainer(max_epochs=max_epochs,
                        accelerator=accelerator,
                        callbacks=[checkpoint_callback, early_stop_rank],
                        logger=logger)

    try:
        # resume from best model if checkpoint is available
        ckpt_path = os.path.join(dir_path, os.listdir(dir_path)[-1])
    except:
        ckpt_path = None

    trainer.fit(model, datamodule=dm, ckpt_path=ckpt_path)


### Parameters search

In [7]:
# best model: emb_dim=40, lr=0.001, margin=1, p_norm=2

config = {
    "batch_size": [128],
    "lr": [0.001],
    "emb_dim": [40],
    "p_norm": [1],
    "margin": [1, 2]
}

num_epochs = 1000

import itertools

keys, values = zip(*config.items())
comb_list = [dict(zip(keys,v)) for v in itertools.product(*values)]

for comb in comb_list:
    try:
        train_transe(config=comb, max_epochs=num_epochs, dataset="WordNet18")
    except:
        pass

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 1.6 M 
1 | relation_mat | Embedding | 720   
-------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.554     Total estimated model params size (MB)


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 112: 100%|██████████| 1146/1146 [00:46<00:00, 24.83it/s, loss=9.71, v_num=27, val_loss=20.40, val_mean_rank=346.0, val_hits@10=53.10] 


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 1.6 M 
1 | relation_mat | Embedding | 720   
-------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.554     Total estimated model params size (MB)


Epoch 73:  40%|████      | 462/1146 [00:08<00:12, 54.20it/s, loss=189, v_num=28, val_loss=242.0, val_mean_rank=1.73e+3, val_hits@10=28.90] 

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [8]:
# best model: emb_dim=40, lr=0.001, margin=1, p_norm=2

config = {
    "batch_size": [128],
    "lr": [0.1, 0.01],
    "emb_dim": [20, 40],
    "p_norm": [2, 1],
    "margin": [1, 2, 10]
}

num_epochs = 1000

import itertools

keys, values = zip(*config.items())
comb_list = [dict(zip(keys,v)) for v in itertools.product(*values)]

for comb in comb_list:
    try:
        train_transe(config=comb, max_epochs=num_epochs, dataset="WordNet18")
    except:
        pass

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 818 K 
1 | relation_mat | Embedding | 360   
-------------------------------------------
819 K     Trainable params
0         Non-trainable params
819 K     Total params
3.277     Total estimated model params size (MB)


Epoch 14: 100%|██████████| 1146/1146 [00:50<00:00, 22.85it/s, loss=165, v_num=29, val_loss=174.0, val_mean_rank=1.31e+4, val_hits@10=2.200]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



Epoch 73:  40%|████      | 462/1146 [12:56<19:09,  1.68s/it, loss=189, v_num=28, val_loss=242.0, val_mean_rank=1.73e+3, val_hits@10=28.90]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 818 K 
1 | relation_mat | Embedding | 360   
-------------------------------------------
819 K     Trainable params
0         Non-trainable params
819 K     Total params
3.277     Total estimated model params size (MB)


Epoch 11: 100%|██████████| 1146/1146 [00:49<00:00, 23.28it/s, loss=396, v_num=30, val_loss=416.0, val_mean_rank=1.69e+4, val_hits@10=1.100]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 818 K 
1 | relation_mat | Embedding | 360   
-------------------------------------------
819 K     Trainable params
0         Non-trainable params
819 K     Total params
3.277     Total estimated model params size (MB)


Epoch 10: 100%|██████████| 1146/1146 [00:49<00:00, 22.99it/s, loss=2.35e+03, v_num=31, val_loss=2.46e+3, val_mean_rank=1.66e+4, val_hits@10=1.130]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 818 K 
1 | relation_mat | Embedding | 360   
-------------------------------------------
819 K     Trainable params
0         Non-trainable params
819 K     Total params
3.277     Total estimated model params size (MB)


Epoch 10: 100%|██████████| 1146/1146 [00:38<00:00, 29.72it/s, loss=213, v_num=32, val_loss=228.0, val_mean_rank=2.05e+4, val_hits@10=0.090]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 818 K 
1 | relation_mat | Embedding | 360   
-------------------------------------------
819 K     Trainable params
0         Non-trainable params
819 K     Total params
3.277     Total estimated model params size (MB)


Epoch 10: 100%|██████████| 1146/1146 [00:40<00:00, 28.53it/s, loss=457, v_num=33, val_loss=477.0, val_mean_rank=2.19e+4, val_hits@10=0.100]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 818 K 
1 | relation_mat | Embedding | 360   
-------------------------------------------
819 K     Trainable params
0         Non-trainable params
819 K     Total params
3.277     Total estimated model params size (MB)


Epoch 16: 100%|██████████| 1146/1146 [00:48<00:00, 23.79it/s, loss=2.39e+03, v_num=34, val_loss=2.51e+3, val_mean_rank=2.11e+4, val_hits@10=0.040]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 1.6 M 
1 | relation_mat | Embedding | 720   
-------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.554     Total estimated model params size (MB)


Epoch 10: 100%|██████████| 1146/1146 [00:52<00:00, 21.88it/s, loss=183, v_num=35, val_loss=193.0, val_mean_rank=1.5e+4, val_hits@10=1.090] 

GPU available: True (cuda), used: True





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 1.6 M 
1 | relation_mat | Embedding | 720   
-------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.554     Total estimated model params size (MB)


Epoch 10: 100%|██████████| 1146/1146 [00:51<00:00, 22.27it/s, loss=409, v_num=36, val_loss=432.0, val_mean_rank=1.75e+4, val_hits@10=0.280]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 1.6 M 
1 | relation_mat | Embedding | 720   
-------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.554     Total estimated model params size (MB)


Epoch 10: 100%|██████████| 1146/1146 [01:02<00:00, 18.30it/s, loss=2.35e+03, v_num=37, val_loss=2.47e+3, val_mean_rank=1.76e+4, val_hits@10=0.200]

GPU available: True (cuda), used: True





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 1.6 M 
1 | relation_mat | Embedding | 720   
-------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.554     Total estimated model params size (MB)


Epoch 10: 100%|██████████| 1146/1146 [00:52<00:00, 21.64it/s, loss=217, v_num=38, val_loss=227.0, val_mean_rank=2.12e+4, val_hits@10=0.080]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 1.6 M 
1 | relation_mat | Embedding | 720   
-------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.554     Total estimated model params size (MB)


Epoch 10: 100%|██████████| 1146/1146 [00:41<00:00, 27.90it/s, loss=448, v_num=39, val_loss=472.0, val_mean_rank=2.22e+4, val_hits@10=0.090]

GPU available: True (cuda), used: True





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 1.6 M 
1 | relation_mat | Embedding | 720   
-------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.554     Total estimated model params size (MB)


Epoch 10: 100%|██████████| 1146/1146 [01:02<00:00, 18.34it/s, loss=2.39e+03, v_num=40, val_loss=2.52e+3, val_mean_rank=2.24e+4, val_hits@10=0.050]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 818 K 
1 | relation_mat | Embedding | 360   
-------------------------------------------
819 K     Trainable params
0         Non-trainable params
819 K     Total params
3.277     Total estimated model params size (MB)


Epoch 53: 100%|██████████| 1146/1146 [00:49<00:00, 23.21it/s, loss=44.1, v_num=41, val_loss=50.80, val_mean_rank=571.0, val_hits@10=27.00] 

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 818 K 
1 | relation_mat | Embedding | 360   
-------------------------------------------
819 K     Trainable params
0         Non-trainable params
819 K     Total params
3.277     Total estimated model params size (MB)


Epoch 26: 100%|██████████| 1146/1146 [00:49<00:00, 23.38it/s, loss=269, v_num=42, val_loss=290.0, val_mean_rank=971.0, val_hits@10=20.80]  

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 818 K 
1 | relation_mat | Embedding | 360   
-------------------------------------------
819 K     Trainable params
0         Non-trainable params
819 K     Total params
3.277     Total estimated model params size (MB)


Epoch 27: 100%|██████████| 1146/1146 [00:48<00:00, 23.45it/s, loss=2.21e+03, v_num=43, val_loss=2.33e+3, val_mean_rank=947.0, val_hits@10=19.70]  

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 818 K 
1 | relation_mat | Embedding | 360   
-------------------------------------------
819 K     Trainable params
0         Non-trainable params
819 K     Total params
3.277     Total estimated model params size (MB)


Epoch 75: 100%|██████████| 1146/1146 [00:48<00:00, 23.71it/s, loss=94, v_num=44, val_loss=108.0, val_mean_rank=3.93e+3, val_hits@10=6.950]  

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
Restoring states from the checkpoint path at ./ckpt_WordNet18/emb_dim=20-lr=0.01-margin=2-p_norm=1\transe-dataset=0-epoch=22-val_mean_rank=1439-val_hits@10=17.2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 818 K 
1 | relation_mat | Embedding | 360   
-------------------------------------------
819 K     Trainable params
0         Non-trainable params
819 K     Total params
3.277     Total estimated model params size (MB)
Restored all states from the checkpoint file at ./ckpt_WordNet18/emb_dim=20-lr=0.01-margin=2-p_norm=1\transe-dataset=0-epoch=22-val_mean_rank=1439-val_hits@10=17.2.ckpt


Epoch 33: 100%|██████████| 1146/1146 [00:48<00:00, 23.71it/s, loss=285, v_num=45, val_loss=313.0, val_mean_rank=1.46e+3, val_hits@10=20.20]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 818 K 
1 | relation_mat | Embedding | 360   
-------------------------------------------
819 K     Trainable params
0         Non-trainable params
819 K     Total params
3.277     Total estimated model params size (MB)


Epoch 64: 100%|██████████| 1146/1146 [00:49<00:00, 23.37it/s, loss=2.28e+03, v_num=46, val_loss=2.41e+3, val_mean_rank=9.62e+3, val_hits@10=1.380]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs





LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 1.6 M 
1 | relation_mat | Embedding | 720   
-------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.554     Total estimated model params size (MB)


Epoch 44: 100%|██████████| 1146/1146 [00:57<00:00, 19.89it/s, loss=49.4, v_num=47, val_loss=56.50, val_mean_rank=590.0, val_hits@10=30.20] 

GPU available: True (cuda), used: True





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 1.6 M 
1 | relation_mat | Embedding | 720   
-------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.554     Total estimated model params size (MB)


Epoch 23: 100%|██████████| 1146/1146 [00:53<00:00, 21.35it/s, loss=284, v_num=48, val_loss=302.0, val_mean_rank=1.24e+3, val_hits@10=20.60]

GPU available: True (cuda), used: True





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 1.6 M 
1 | relation_mat | Embedding | 720   
-------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.554     Total estimated model params size (MB)


Epoch 25: 100%|██████████| 1146/1146 [00:59<00:00, 19.16it/s, loss=2.23e+03, v_num=49, val_loss=2.35e+3, val_mean_rank=1.28e+3, val_hits@10=19.30]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 1.6 M 
1 | relation_mat | Embedding | 720   
-------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.554     Total estimated model params size (MB)


Epoch 10: 100%|██████████| 1146/1146 [01:02<00:00, 18.26it/s, loss=178, v_num=50, val_loss=211.0, val_mean_rank=1.76e+4, val_hits@10=0.250]

GPU available: True (cuda), used: True





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 1.6 M 
1 | relation_mat | Embedding | 720   
-------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.554     Total estimated model params size (MB)


Epoch 10: 100%|██████████| 1146/1146 [01:02<00:00, 18.26it/s, loss=391, v_num=51, val_loss=438.0, val_mean_rank=1.86e+4, val_hits@10=0.240]

GPU available: True (cuda), used: True





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 1.6 M 
1 | relation_mat | Embedding | 720   
-------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.554     Total estimated model params size (MB)


Epoch 10: 100%|██████████| 1146/1146 [00:59<00:00, 19.27it/s, loss=2.34e+03, v_num=52, val_loss=2.48e+3, val_mean_rank=1.84e+4, val_hits@10=0.140]


### Single Training

In [96]:
single_train_config = {
    'batch_size': 128,
    'lr': 0.001,
    'emb_dim': 40,
    'p_norm': 2,
    'margin': 1
}

train_transe(config=single_train_config, main_path="./ckpt_WordNet18/", dataset="WordNet18")

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


./ckpt_WordNet18/ckpt_WordNet18/emb_dim=40-lr=0.001-margin=1-p_norm=2
                                                                   

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params
-------------------------------------------
0 | entity_mat   | Embedding | 1.6 M 
1 | relation_mat | Embedding | 720   
-------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.554     Total estimated model params size (MB)


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 0:  97%|█████████▋| 1117/1146 [00:38<00:00, 29.07it/s, loss=107, v_num=68]

### Prediction

In [32]:
def hit10_from_filename(filename):
    filename = filename.replace('.ckpt','')
    return float(filename.split('@10=')[1])

def predict_transe(config):
    trainer = pl.Trainer()

    dm = WordNetDataModule(batch_size=config['batch_size'], dataset=config['dataset'])

    # ckpt_path from model config
    path = f"./ckpt_{config['dataset']}/emb_dim={config['emb_dim']}-lr={config['learning_rate']}-margin={config['margin']}-p_norm={config['p_norm']}/"

    # take filename of model with highest hit@10
    filename = max(os.listdir(path), key=hit10_from_filename)
    
    model = TransE(emb_dim=config['emb_dim'],
                learning_rate=config['learning_rate'],
                margin=config['margin'],
                p_norm=config['p_norm'],
                dataset=config['dataset'])
    
    pred = trainer.predict(model, datamodule=dm, ckpt_path=os.path.join(path, filename))
    test_mean_rank, test_hits_at_10 = model.compute_epoch_metrics(pred, stage="predict", log_value=False)
    print("\n")
    print(f"test_mean_rank={test_mean_rank:.0f}, test_hits@10={test_hits_at_10:.2f}%")

# PREDICTION ON TEST SET

predict_config = {
    'batch_size': 128,
    'learning_rate': 0.001,
    'emb_dim': 40,
    'p_norm': 2,
    'margin': 1,
    'dataset': 'WordNet18'
}

predict_transe(config=predict_config)

GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Restoring states from the checkpoint path at ./ckpt_WordNet18/emb_dim=40-lr=0.001-margin=1-p_norm=2/transe-dataset=0-epoch=88-val_mean_rank=202-val_hits@10=61.2.ckpt
Loaded model weights from checkpoint at ./ckpt_WordNet18/emb_dim=40-lr=0.001-margin=1-p_norm=2/transe-dataset=0-epoch=88-val_mean_rank=202-val_hits@10=61.2.ckpt
  rank_zero_warn(


Predicting DataLoader 0:   0%|          | 0/40 [00:00<?, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


TypeError: 'NoneType' object is not iterable