<a href="https://colab.research.google.com/github/odean111/KnowledgeGraph/blob/main/Embedding_Uncertain_Knowledge_Graph.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import TSV File

In [4]:
## Extract TSV and put into a dataframe

import csv
import pandas as pd
import numpy as np

tsv_file = open("Knowledge Graph - Known Relationship.tsv")
kg = csv.reader(tsv_file, delimiter="\t")
cols = ['ent_1', 'ent_2', 'rel', 'conf']

ls_kg = []
for row in kg:
  ls_kg.append(row)

df_kg = pd.DataFrame(data=ls_kg, columns=cols)

Create Set of Ground Truths - based on AND transivity


In [5]:
# import required packages
import itertools


# return a list of the nodes
def unique_ent(df_input):
  unq_ent_1 = df_input.ent_1.unique()
  unq_ent_2 = df_input.ent_2.unique()
  unq_ent = np.unique(np.concatenate((unq_ent_1, unq_ent_2)))
  return unq_ent


# Create a dataframe of all, return a dataframe 
def return_near_nodes(input_node, df_input):
  ent_1_df = df_input.loc[df_input['ent_1'] == input_node]
  ent_2_df = df_input.loc[df_input['ent_2'] == input_node]
  df_rel = pd.concat([ent_2_df, ent_1_df])
  df_near_ents = unique_ent(df_rel)
  np_near_nodes = np.delete(df_near_ents, np.where(df_near_ents == input_node))
  # return df_rel, np_near_ents
  return np_near_nodes


# Return list of unseen relationships
def return_unseen_relationships(np_near_ents, df_input):
  unseen_relationships = []
  combinations = itertools.combinations(np_near_ents, 2)
  for relationships in combinations:
    if len(df_input.loc[df_input['ent_1'] == relationships[0]].loc[df_input['ent_2'] == relationships[1]]) > 0:
      pass
    elif len(df_input.loc[df_input['ent_1'] == relationships[1]].loc[df_input['ent_2'] == relationships[0]]) > 0:
      pass
    else:
      unseen_relationships.append(relationships)
  return unseen_relationships
    
    
# Calculate truth value via an intermediatary node
def truth_val_calc(unseen_relationships, df_input, central_node):
  
  ls_rel_tvals = []
  for ele in unseen_relationships:
    ent_1 = ele[0]
    ent_2 = ele[1]
    if len(df_input.loc[df_input['ent_1'] == ent_1].loc[df_input['ent_2'] == central_node]['conf'].tolist()) > 0:
      tval_1 = df_input.loc[df_input['ent_1'] == ent_1].loc[df_input['ent_2'] == central_node]['conf'].tolist()[0]
    elif len(df_input.loc[df_input['ent_2'] == ent_1].loc[df_input['ent_1'] == central_node]['conf'].tolist()) > 0:
      tval_1 = df_input.loc[df_input['ent_2'] == ent_1].loc[df_input['ent_1'] == central_node]['conf'].tolist()[0]

    if len(df_input.loc[df_input['ent_1'] == ent_2].loc[df_input['ent_2'] == central_node]['conf'].tolist()) > 0:
      tval_2 = df_input.loc[df_input['ent_1'] == ent_2].loc[df_input['ent_2'] == central_node]['conf'].tolist()[0]
    elif len(df_input.loc[df_input['ent_2'] == ent_2].loc[df_input['ent_1'] == central_node]['conf'].tolist()) > 0:
      tval_2 = df_input.loc[df_input['ent_2'] == ent_2].loc[df_input['ent_1'] == central_node]['conf'].tolist()[0]
    relationship_vals = [ent_1, ent_2, float(tval_1) + float(tval_2)]
    ls_rel_tvals.append(relationship_vals)
  
  return ls_rel_tvals


# Return a list of all unseen relationships and their minimum truth values (ground truths)
def return_ground_truths(input_df):
  unique_nodes = unique_ent(input_df).tolist()
  unseen_relationships_ls = []
  for node in unique_nodes:
    near_nodes = return_near_nodes(node, input_df)
    unseen_relationships = return_unseen_relationships(near_nodes, input_df)
    unseen_relationships_ls.extend(truth_val_calc(unseen_relationships, input_df, node))
  return unseen_relationships_ls

In [6]:
ls_ground_truths = return_ground_truths(df_kg)

In [34]:
ls_kg_wo_rels = []
for ele in ls_kg:
  confid = ele[3:][0]
  confid = float(confid)
  new_ele = ele[0:2]
  new_ele.append(confid)
  ls_kg_wo_rels.append(new_ele)

In [35]:
print(ls_ground_truths)
print(ls_kg_wo_rels)

[['Phoebe', 'Ross', 1.83], ['Joey', 'Monica', 1.76], ['Joey', 'Rachel', 1.87], ['Monica', 'Ross', 1.35], ['Phoebe', 'Ross', 1.4], ['Chandler', 'Joey', 1.73], ['Chandler', 'Rachel', 1.37], ['Joey', 'Rachel', 1.3599999999999999]]
[['Rachel', 'Monica', 0.85], ['Rachel', 'Phoebe', 0.9], ['Rachel', 'Ross', 0.5], ['Ross', 'Chandler', 0.87], ['Ross', 'Joey', 0.86], ['Joey', 'Phoebe', 0.97], ['Monica', 'Phoebe', 0.79]]


Model

In [36]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import autograd

In [99]:
class Network(nn.Module):
  def __init__ (self, input_dims, output_dims):
    super(Network, self).__init__()
    self.fc1 = nn.Linear(in_features=input_dims, out_features=output_dims)

  def forward(self, t):
    # (1) input layer
    t = t

    # (2) hidden conv layer
    t = self.fc1(t)
    t = F.relu(t)

    # (3) return values
    return t

In [101]:
network = Network(7, 3)

torch_1 = torch.tensor([1, 0, 0, 0, 0, 0, 0], dtype=torch.float32)
torch_2 = torch.tensor([0, 1, 0, 0, 0, 0, 0], dtype=torch.float32)
torch_3 = torch.tensor([0, 0, 1, 0, 0, 0, 0], dtype=torch.float32)
output_1 = network(torch_1)
output_2 = network(torch_2)
output_3 = network(torch_3)

hadamard_prod = output_1 * output_2
output = torch.dot(hadamard_prod, output_3)

print(output)

tensor(0.0351, grad_fn=<DotBackward>)
