In [1]:
import torch
import pandas as pd
import numpy as np
import torch

In [2]:
# constants:
k_dim = 20
gamma = 2

In [77]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


We are working with several categories of objects. First of all we have the database:
$$D = \{(e^l, r, e^r)\} \subseteq E \times R \times E =: \Omega$$
where $e^l, e^r$'s are entities and $r$'s are relations, $E, R$ are sets of them. Second of all we are training our model that is concerned with vectors:
$$\overline{e^l}, \overline{e^r}, \overline{r} \in \mathbb{R}^k.$$

Our main object is a triple $s = (e^l, r, e^r)$ for which we define its loss to be
$$l(s) = relu (\gamma + \mathbb{1}_D\cdot ||\overline{e^l}+ \overline{e^r}- \overline{r}|| - \mathbb{1}_{\Omega \backslash D}\cdot ||\overline{e^l}+ \overline{e^r}- \overline{r}||)$$

(where $\gamma$ is some pre-defined margin).

If we put $\overline{D} = D \cup \{(e^l, r, e^r) \; : \; e^l \in D \vee e^r \in D\}$ the overall loss will be 
$$\mathcal{L} = \sum_{s \in \overline{D}} l(s)$$

In [57]:
# reading dataset and extracting sets of entities, relations. 
''' 
datastructure here is following:
we store rel-s in a dictionary with keys being "name1 name2" and 
values being sets of relations between name1 and name2

here we also initialize values that we'll use later for the model
'''


f = open('/content/drive/MyDrive/MLDL/Release/train.txt', "r")
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

# f = open('/train.txt', 'r')

dataset = {}
ent_vect = {}
rel_vect = {}

while True:
  line = f.readline()
  mas = line.split('\t')
  if len(mas) ==3 :
    ent_l = line.split('\t')[0]
    rel = line.split('\t')[1]

    r = line.split('\t')[2]
    ent_r = r[:len(r)-1]
    # print(ent_l + " " + ent_r)

    val_dat = dataset.get(ent_l + " " + ent_r, -100)
    val_ent_l = ent_vect.get(ent_l, "lol")
    val_ent_r = ent_vect.get(ent_r, "lol")
    val_rel = rel_vect.get(rel, "lol")

    if val_dat != -100:
      dataset[ent_l + " " + ent_r].add(rel)
    else:
      dataset.update({ent_l + " " + ent_r : {rel} })

    if val_ent_l == "lol":
      a = torch.rand(k_dim, device = device)
      ent_vect[ent_l] = a

    if val_ent_r == "lol":
      a = torch.rand(k_dim, device = device)
      ent_vect[ent_r] = a

    if val_rel == "lol":
      a = torch.rand(k_dim, device = device)
      rel_vect[rel] = a
  else:
      break
# print(rel_vect['/tv/tv_program/regular_cast./tv/regular_tv_appearance/actor']) #checking that everything's OK

tensor([0.9122, 0.1428, 0.1529, 0.2793, 0.6300, 0.3247, 0.0249, 0.6887, 0.7007,
        0.0142, 0.1191, 0.0099, 0.5673, 0.9276, 0.5176, 0.3062, 0.6451, 0.4297,
        0.9711, 0.5383], device='cuda:0')


In [18]:
import random
import math


''' 
dataset = {} -- consists of      [num_1, num_2, relations(num_1, num_2)    ]

ent_vect = {} -- consists of      [entity, vector],   vectors initialized to be random 

rel_vect = {} -- consists of      [relation, vector],   vectors initialized to be random 
'''

' \ndataset = {} -- consists of      [num_1, num_2, relations(num_1, num_2)    ]\n\nent_vect = {} -- consists of      [entity, vector],   vectors initialized to be random \n\nrel_vect = {} -- consists of      [relation, vector],   vectors initialized to be random \n'

In [31]:
def relu(x):
  return max(x, 0)
def dist(x,y):
  return math.sqrt((x-y)@torch.transpose(x-y))

In [75]:
def making_batch(size_b, limit_counter = 15): #benedict minibatch
  out = []
  k = 0
  while k < size_b:
    name, rels = random.choice(list(dataset.items())) # choosing random pair of entities

    rel = random.choice(list(rels)) # choosing random relation between them 

    [e1, e2] = name.split() #names of these relations

    counter = 0

    while counter < limit_counter:
      a = random.randint(1,2)
      ent, _ = random.choice(list(ent_vect.items()))
      if a == 1:
        v = (ent + " " + e2)
      else: 
        v = (e1 + " " + ent)

      if dataset.get(v, "lol") != "lol":
        if not (rel in  dataset[v]):
          k+=1
          if a == 1:
            out.append(((e1, rel, e2), (ent, rel, e2)))
            continue
          else:
            out.append(((e1, rel, e2),(e1, rel, ent)))
            continue
      counter+=1
  return out

# a = making_batch(3)
# print(a)

In [44]:
def batch_true(size_b):
  out = []
  for _ in range(size_b):
    name, rels = random.choice(list(dataset.items()))
    rel = random.choice(list(rels))
    [ent1, ent2] = name.split()
    out.append((ent1, rel, ent2))
  return out

def error(e1, r, e2):
  return dist(ent_vect[e1]+rel_vect[r], ent_vect[e2])

def find_cor(e1, r, e2, flag): #another function that samples a corrupted triplet for a given one
  counter = 0
  while (True):
    ent, _ = random.choice(list(ent_vect.items()))
    # print(ent)
    if flag == 1:
      v = (ent + " " + e2)
    else:
      v = (e1 + " " + ent)
    
    if dataset.get(v, "lol") != "lol":
      if not (r in  dataset[v]):
        if flag == 1:
          return (ent, r, e2)
        else:
          return (e1, r, ent)
      # else:
        # print(ent, r, e2)
    
# print(find_cor("/m/017dcd", "/tv/tv_program/regular_cast./tv/regular_tv_appearance/actor", "/m/06v8s0", 2)) #testing
  
def corrupted_loss(e1, r, e2): #loss for a single point

  a = random.randint(1,2) #use it here to make "corruption"
                          #on one of two ends without decision making
  corr = find_cor(e1,r,e2,a)
  el_ = corr[0]
  r_ = corr[1]
  er_ = corr[2]
  return relu(gamma + error(e1, r, e2) - error(el_, r_, er_))

In [28]:
torch.cuda.is_available()
!nvidia-smi

Sun Mar 26 20:57:05 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   46C    P0    28W /  70W |    579MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [71]:
#here are some additional functions to utilize while training

def norming():
  for name in ent_vect:
    x = ent_vect[name]
    ent_vect[name] = x / torch.norm(x)

lam = 0.01 #learning rate
def update(batch): #function that updates vectors
  for true_val in batch:
    a = random.randint(1,2)

    pair = find_cor(true_val[0], true_val[1], true_val[2], a)

    ent_vect[true_val[0]] += lam*2*(ent_vect[true_val[0]] + rel_vect[true_val[1]] - ent_vect[true_val[2]])
    ent_vect[true_val[2]] -= lam*2*(ent_vect[true_val[0]] + rel_vect[true_val[1]] - ent_vect[true_val[2]])
    rel_vect[true_val[1]] += lam*2*(ent_vect[true_val[0]] + rel_vect[true_val[1]] - ent_vect[true_val[2]])

    ent_vect[true_val[0]] -= lam*2*(ent_vect[pair[0]] + rel_vect[pair[1]] - ent_vect[pair[2]])
    rel_vect[true_val[1]] += lam*2*(ent_vect[pair[0]] + rel_vect[pair[1]] - ent_vect[pair[2]])
    ent_vect[true_val[2]] -= lam*2*(ent_vect[pair[0]] + rel_vect[pair[1]] - ent_vect[pair[2]])

def alt_upd (batch): #function that updates vectors in another way
  for true_val in batch:
    e1 = true_val[0][0].strip()
    e2 = true_val[0][2].strip()
    r = true_val[0][1].strip()

    e1_ = true_val[1][0].strip()
    e2_ = true_val[1][2].strip()
    r_ = true_val[1][1].strip()

    shift_1 = lam*2*(ent_vect[e1] + rel_vect[r] - ent_vect[e2])

    ent_vect[e1] += shift_1
    ent_vect[e2] -= shift_1
    rel_vect[r] += shift_1

    shift_2 = lam*2*(ent_vect[e1_] + rel_vect[r_] - ent_vect[e2_])

    ent_vect[e1] -= shift_2
    rel_vect[r] += shift_2
    ent_vect[e2] -= shift_2

In [None]:
#article update: 

n_iter = 1
size_b = 10

for _ in range(n_iter):
  print(_)
  norming()
  batch = batch_true(size_b)
  update(batch)

In [None]:
#alternative update:

n_iter = 10
size_b = 10

for _ in range(n_iter):
  print(_)
  norming()
  batch = making_batch(size_b)
  alt_upd(batch)

In [None]:
# print(ent_vect)
# print(rel_vect)