In [1]:
import os
import numpy as np
import pandas as pd
from knowledge_graph_generator import KnowledgeGraphGenerator
from fusion import TransEFuser

In [2]:
# TransEFuser Example:  The TransEFuser can fuse a knowledge graph
# composed of noisy and potentially conflicting triples and generate
# a distance for each triple where the distance is a measure of 
# how "true" that triple is based on the training data.  This method
# uses a modification of the TransE translational embedding technique.
# Example:
# TransEFuser is instantiated with a KnowledgeGraphGenerator object.
# Load the full knowledge graph DataFrame.
raw_kg_df = pd.read_csv("data/raw_kg_df.csv")
# Construct a toy dataset with only AMAZING movies.
toy_movie_list = [
    "m/terminator",
    "m/spaceballs",
    "m/dumb_and_dumber",
    "m/demolition_man",
    "m/hot_tub_time_machine",
]
toy_df = raw_kg_df.loc[raw_kg_df["entity_id"].isin(toy_movie_list)]
kg_obj = KnowledgeGraphGenerator(known_data_list=[toy_df])
# Instantiate the TransEFuser object.
fuser = TransEFuser(kg_obj)
# Fuse the knowledge graph.
fused_kg_df = fuser.fuse()

In [3]:
# The distance conveys how "true" the triple is given
# the training data.  Certain triples seem spot on.
print(fused_kg_df[fused_kg_df["value"] == "lowbrow"].sort_values("distance"))
print("\n")
print(fused_kg_df[fused_kg_df["value"] == "a murderous cyborg"].sort_values("distance"))

                  entity_id relation    value  distance
567  m/hot_tub_time_machine       is  lowbrow  1.864019
566       m/dumb_and_dumber       is  lowbrow  1.956428


         entity_id            relation               value  distance
1880  m/terminator  features the theme  a murderous cyborg  1.931387


In [4]:
# But the method generates a lot of noise.
random_triples = np.random.choice(range(len(fused_kg_df)), size=10)
fused_kg_df.iloc[random_triples]

Unnamed: 0,entity_id,relation,value,distance
1728,m/demolition_man,features the theme,the cities of los angeles,2.092196
2272,m/terminator,features the person,John,2.026059
1670,m/hot_tub_time_machine,features the theme,time travel plunges,1.618927
1314,m/hot_tub_time_machine,features the theme,the offset,1.775279
149,m/terminator,is,brilliant,1.981868
883,m/terminator,features the theme,its kind,1.767191
41,m/hot_tub_time_machine,is,high,2.061755
1134,m/hot_tub_time_machine,features the theme,grown men,1.892759
2201,m/hot_tub_time_machine,features the miscellaneous theme,' 80s,1.794854
806,m/hot_tub_time_machine,features the theme,expectations,1.95165
