In [1]:
from gensim.models import KeyedVectors
from os import path
import pandas as pd
import torch
from sklearn.preprocessing import LabelEncoder
import numpy as np
from torch_geometric.data import Data

In [2]:
embedded_relations = KeyedVectors.load_word2vec_format(
        "embeddings/relation_embedding_le_FB15k_237_1024.bin",
        binary=True,
    )

In [3]:
sorted_embedding = []
for i in range(0, len(embedded_relations.vectors)):
    sorted_embedding.append(embedded_relations.get_vector(str(i)))
embedded_relations = torch.tensor(sorted_embedding, dtype=torch.float)

In [4]:
le_relation = LabelEncoder()
le_relation.classes_ = np.load(path.join('embeddings','le_relation_classes.npy'), allow_pickle=True)


## Evaluating Graph Star Multi Relational

In [5]:
model = torch.load("output/FB15k_237.pkl", map_location=torch.device('cpu'))

In [6]:
model.eval()
model

GraphStar(
  (fl): Linear(in_features=16, out_features=256, bias=True)
  (star_init): StarAttn(
    (Wq): Linear(in_features=256, out_features=256, bias=True)
    (Wk): Linear(in_features=256, out_features=256, bias=True)
    (Wv): Linear(in_features=256, out_features=256, bias=True)
    (sLayerNorm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
  )
  (conv_list): ModuleList(
    (0): GraphStarConv(256, 256, heads=4)
    (1): GraphStarConv(256, 256, heads=4)
    (2): GraphStarConv(256, 256, heads=4)
  )
  (star_attn_list): ModuleList(
    (0): StarAttn(
      (Wq): Linear(in_features=256, out_features=256, bias=True)
      (Wk): Linear(in_features=256, out_features=256, bias=True)
      (Wv): Linear(in_features=256, out_features=256, bias=True)
      (sLayerNorm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
    )
    (1): StarAttn(
      (Wq): Linear(in_features=256, out_features=256, bias=True)
      (Wk): Linear(in_features=256, out_features=256, bias=True)
      (

In [7]:
# Read data and make dataframes
name = ['entity', 'id']
entity_id = pd.read_csv('./data/FB15k/entities.txt', sep='\t', header=None, names=name, engine='python')
all_entities = entity_id['entity'].values

name = ['relation', 'id']
relation_id = pd.read_csv('./data/FB15k/relations.txt', sep='\t', header=None, names=name, engine='python')
all_relations = relation_id['relation'].values

# Read RDF Triples
name = ['head', 'relation', 'tail']
# concat all data 
train = pd.read_csv('./data/FB15k_237/train.txt', sep='\t', header=None, names=name, engine='python')
valid = pd.read_csv('./data/FB15k_237/valid.txt', sep='\t', header=None, names=name, engine='python')
test = pd.read_csv('./data/FB15k_237/test.txt', sep='\t', header=None, names=name, engine='python')
data = pd.concat([train, valid])
data = pd.concat([data, test])

head = data['head'].values
tail = data['tail'].values
relations = data['relation'].values

In [8]:
# fit entity encoder
le_entity = LabelEncoder()
le_entity.fit(all_entities)

# string list to int array using LabelEncoder on complete data set
heads = le_entity.transform(head)
tails = le_entity.transform(tail)
relations = le_relation.transform(relations)

edge_attributes = torch.tensor(relations, dtype=torch.long)
edge_index = torch.tensor([heads, tails], dtype=torch.long)
unique_entities = torch.tensor(np.unique(edge_index.reshape(edge_index.shape[-1]*2, 1)), dtype=torch.float)

dataset = Data(x=unique_entities, edge_type=edge_attributes, edge_index=edge_index)

In [13]:
!pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

Collecting sparqlwrapper
  Using cached SPARQLWrapper-1.8.5-py3-none-any.whl (26 kB)
Installing collected packages: sparqlwrapper
Successfully installed sparqlwrapper-1.8.5


In [9]:
import sys
from SPARQLWrapper import SPARQLWrapper, JSON

def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


def freebase_parser(freebase_id):
    endpoint_url = "https://query.wikidata.org/sparql"

    query = \
    '''SELECT ?sLabel WHERE { 
        ?s wdt:P646 "''' + freebase_id + '''".
        SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
    }
    LIMIT 1'''
    res = get_results(endpoint_url, query)
    if len(res['results']['bindings']) == 0:
        return "No result"
    else:
        return get_results(endpoint_url, query)['results']['bindings'][0]['sLabel']['value']

def common_to_fb(common_name):
    endpoint_url = "https://query.wikidata.org/sparql"

    query = '''
    SELECT ?fbid WHERE { 
        ?s wdt:P373 "''' + common_name + '''".
    OPTIONAL {
        ?s wdt:P646 ?fbid .
        }
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
    }
    LIMIT 1'''
    res = get_results(endpoint_url, query)
    try:
        res = res['results']['bindings'][0]['fbid']['value']
        return res
    except:
        return common_name + "-1"
    
def inverse_transform(head, tail, rel):
    head = le_entity.inverse_transform([head])
    tail = le_entity.inverse_transform([tail])
    rel = le_relation.inverse_transform([rel])
    return head[0], tail[0], rel[0]


def rdf2txt(head, tail, rel):
    head, tail, rel = inverse_transform(head, tail, rel)
    head = freebase_parser(head)
    tail = freebase_parser(tail)
    return head, tail, str(rel)

In [10]:
def pred(head=True, index=0, sig=True, emb_sig=True):
    emb = model.z
    edge_index = dataset.edge_index.T[index].T
    edge_type = dataset.edge_type[index]
    
    h, t, r = rdf2txt(edge_index[0], edge_index[1], edge_type)
    relation =  model.RW[edge_type]
    if head:
        out_text = "predicted heads:"
        head = emb
        tail = emb[edge_index[1]]
    else:
        out_text = "predicted tails:"
        head = emb[edge_index[0]]
        tail = emb
    p = head*relation*tail

    score = p.sum(dim=1)
    
    pred = score.detach().numpy().argsort()[-10:][::-1]
    
    print(f"Original h:{h}, r: {r}, t:{t}")
    print(out_text)
    for l in pred:
        h, t, r = rdf2txt(edge_index[0], l, edge_type)
        print (t)


In [11]:
pred(head=True, index=5243, sig=True, emb_sig=False)

Original h:Joe Biden, r: /people/person/profession, t:politician
predicted heads:
Tom Skerritt
Tampa Bay Rays
Evan Rachel Wood
Tennessee Williams
Swindon
The Men Who Stare at Goats
Face/Off
Amour
Rupert Holmes
Deportivo Toluca F.C.


In [12]:
# Logits embedding [13292, 256] -> [unique nodes x hidden layer]
z = model.z

def experiment(head_pred=False, index=0):
    
    # Edge index between 2 nodes
    edge_index = dataset.edge_index.T[index].T
    # relation type
    edge_type = dataset.edge_type[index]

    h, t, r = rdf2txt(edge_index[0], edge_index[1], edge_type)
    print(
    f"""
    Original data:
    Head: {h}
    Relation: {r}
    Tail: {t}
    """)
    
    
    relation = model.RW[edge_type]
    if head_pred:
        out_text = "Predicted data (head pred)"
        head = z
        tail = z[edge_index[1]]
    else:
        out_text = "Predicted data (tail pred)"
        head = z[edge_index[0]]
        tail = z    
    
    p = head * relation * tail
    summed = p.sum(dim=1).detach().numpy()

    if head_pred:
        target = summed[edge_index[0]]
        raw_rank = np.where(summed == target)
        pred = summed.argsort()[-1]
        h, t, r = rdf2txt(pred, edge_index[1], edge_type)

    else:
        target = summed[edge_index[1]]
        raw_rank = np.where(summed == target)
        pred = summed.argsort()[-1]
        h, t, r = rdf2txt(edge_index[0], pred, edge_type)
    print(
    f""" 
    {out_text}:  
    Head: {h}  
    Relation: {r}  
    Tail: {t}  
    Rank of target: {raw_rank[0][0]}
    """)

In [13]:
experiment(head_pred=True, index=5243)


    Original data:
    Head: Joe Biden
    Relation: /people/person/profession
    Tail: politician
    
 
    Predicted data (head pred):  
    Head: Tom Skerritt  
    Relation: /people/person/profession  
    Tail: politician  
    Rank of target: 134
    


# FAKENEWSNET PRED WITH FB15K

In [15]:
from glob import glob
paths = glob("./data/fakenewsnet/*")

In [16]:
paths

['./data/fakenewsnet/clean_fakenewsnet_triple.csv',
 './data/fakenewsnet/fakenewsnet_triples.csv']

In [19]:
orginal = pd.read_csv(paths[0], index_col=0)
processed = pd.read_csv(paths[1], index_col=0)

In [20]:
processed

Unnamed: 0,text_id,head,relation,tail,fb_head,fb_tail,label
0,0,Bill Clinton,organization/role/leaders./organization/leader...,NPR,/m/0157m,/m/0c0sl,False
1,1,Bill Clinton,people/person/employment_history./business/emp...,President of the United States,/m/0157m,/m/02mjmr,False
2,1,Marsha Blackburn,people/person/employment_history./business/emp...,Rep,/m/01fnkt,/m/02qkv0r,False
3,1,Roger Bate,organization/role/leaders./organization/leader...,American Enterprise Institute,/m/0b23zg,/m/0p8q4,False
4,1,Hillary Clinton,people/person/employment_history./business/emp...,Candidate,/m/0d06m5,/m/07r9r0,False
...,...,...,...,...,...,...,...
995,415,William H. Seward,organization/role/leaders./organization/leader...,Republican Party,/m/0k_2z,/m/085srz,True
996,415,John Tyler,people/person/employment_history./business/emp...,President of the United States,/m/042dk,/m/02mjmr,True
997,415,Donald Trump,people/person/employment_history./business/emp...,President of the United States,/m/0cqt90,/m/02mjmr,True
998,415,Hillary Clinton,people/person/employment_history./business/emp...,President of the United States,/m/0d06m5,/m/02mjmr,True


In [34]:
processed.set_index(["text_id"])

Unnamed: 0_level_0,head,relation,tail,fb_head,fb_tail,label
text_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,Bill Clinton,organization/role/leaders./organization/leader...,NPR,/m/0157m,/m/0c0sl,False
1,Bill Clinton,people/person/employment_history./business/emp...,President of the United States,/m/0157m,/m/02mjmr,False
1,Marsha Blackburn,people/person/employment_history./business/emp...,Rep,/m/01fnkt,/m/02qkv0r,False
1,Roger Bate,organization/role/leaders./organization/leader...,American Enterprise Institute,/m/0b23zg,/m/0p8q4,False
1,Hillary Clinton,people/person/employment_history./business/emp...,Candidate,/m/0d06m5,/m/07r9r0,False
...,...,...,...,...,...,...
415,William H. Seward,organization/role/leaders./organization/leader...,Republican Party,/m/0k_2z,/m/085srz,True
415,John Tyler,people/person/employment_history./business/emp...,President of the United States,/m/042dk,/m/02mjmr,True
415,Donald Trump,people/person/employment_history./business/emp...,President of the United States,/m/0cqt90,/m/02mjmr,True
415,Hillary Clinton,people/person/employment_history./business/emp...,President of the United States,/m/0d06m5,/m/02mjmr,True


In [43]:
def transform_entity(entity):
    try:
        le_entity.transform(entity)
    except:
        print("nope")

In [46]:
q.fb_head.apply(lambda x: transform_entity(x))


nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope


text_id
0      None
1      None
1      None
1      None
1      None
       ... 
415    None
415    None
415    None
415    None
416    None
Name: fb_head, Length: 1000, dtype: object

In [47]:
q

Unnamed: 0_level_0,head,relation,tail,fb_head,fb_tail,label
text_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,Bill Clinton,organization/role/leaders./organization/leader...,NPR,/m/0157m,/m/0c0sl,False
1,Bill Clinton,people/person/employment_history./business/emp...,President of the United States,/m/0157m,/m/02mjmr,False
1,Marsha Blackburn,people/person/employment_history./business/emp...,Rep,/m/01fnkt,/m/02qkv0r,False
1,Roger Bate,organization/role/leaders./organization/leader...,American Enterprise Institute,/m/0b23zg,/m/0p8q4,False
1,Hillary Clinton,people/person/employment_history./business/emp...,Candidate,/m/0d06m5,/m/07r9r0,False
...,...,...,...,...,...,...
415,William H. Seward,organization/role/leaders./organization/leader...,Republican Party,/m/0k_2z,/m/085srz,True
415,John Tyler,people/person/employment_history./business/emp...,President of the United States,/m/042dk,/m/02mjmr,True
415,Donald Trump,people/person/employment_history./business/emp...,President of the United States,/m/0cqt90,/m/02mjmr,True
415,Hillary Clinton,people/person/employment_history./business/emp...,President of the United States,/m/0d06m5,/m/02mjmr,True
