In [1]:
import os
import numpy as np
import pandas as pd
from knowledge_graph_generator import KnowledgeGraphGenerator
from fusion import TransEFuser

In [2]:
# TransEFuser Example:  The TransEFuser can fuse a knowledge graph
# composed of noisy and potentially conflicting triples and generate
# a distance for each triple where the distance is a measure of 
# how "true" that triple is based on the training data.  This method
# uses a modification of the TransE translational embedding technique.
# Example:
# TransEFuser is instantiated with a KnowledgeGraphGenerator object.
# Load the full knowledge graph DataFrame.
raw_kg_df = pd.read_csv("data/raw_kg_df.csv")
# Construct a toy dataset with only AMAZING movies.
toy_movie_list = [
    "m/terminator",
    "m/spaceballs",
    "m/dumb_and_dumber",
    "m/demolition_man",
    "m/hot_tub_time_machine",
]
toy_df = raw_kg_df.loc[raw_kg_df["entity_id"].isin(toy_movie_list)]
kg_obj = KnowledgeGraphGenerator(known_data_list=[toy_df])
# Instantiate the TransEFuser object.
fuser = TransEFuser(kg_obj)
# Fuse the knowledge graph.
fused_kg_df = fuser.fuse()

In [3]:
# The distance conveys how "true" the triple is given
# the training data.  Certain triples seem spot on.
print(fused_kg_df[fused_kg_df["value"] == "lowbrow"].sort_values("distance"))
print("\n")
print(fused_kg_df[fused_kg_df["value"] == "a murderous cyborg"].sort_values("distance"))

                  entity_id relation    value  distance
567  m/hot_tub_time_machine       is  lowbrow  1.619098
566       m/dumb_and_dumber       is  lowbrow  2.046808


         entity_id            relation               value  distance
1880  m/terminator  features the theme  a murderous cyborg  1.916045


In [4]:
# But the method generates a lot of noise.
random_triples = np.random.choice(range(len(fused_kg_df)), size=10)
fused_kg_df.iloc[random_triples]

Unnamed: 0,entity_id,relation,value,distance
533,m/demolition_man,is,bodily,1.90235
1723,m/dumb_and_dumber,features the theme,the days of john belushi,1.923488
1400,m/dumb_and_dumber,features the theme,some initial struggle,1.827668
101,m/hot_tub_time_machine,is,silly,1.977551
418,m/hot_tub_time_machine,is,idiotic,1.774883
1577,m/spaceballs,features the theme,the villainous dark helmet,2.055307
111,m/dumb_and_dumber,is,modern,2.03716
1050,m/hot_tub_time_machine,features the theme,high concept,1.912786
1928,m/hot_tub_time_machine,features the theme,a cute and silly romp,1.414594
1906,m/hot_tub_time_machine,features the theme,a platform for retro fun,1.67758
