In [1]:
import os
import numpy as np
import pandas as pd
from knowledge_graph_generator import KnowledgeGraphGenerator
from fusion import TransEFuser

In [2]:
df = pd.read_csv("data/kg_count_year_df.csv")

In [3]:
df = df[["entity_id", "relation", "value"]]
df.to_csv("data/raw_kg_df.csv", index=False)

In [4]:
# TransEFuser Example:  The TransEFuser can fuse a knowledge graph
# composed of noisy and potentially conflicting triples and generate
# a distance for each triple where the distance is a measure of 
# how "true" that triple is based on the training data.  This method
# uses a modification of the TransE translational embedding technique.
# Example:
# TransEFuser is instantiated with a KnowledgeGraphGenerator object.
# Load the full knowledge graph DataFrame.
raw_kg_df = pd.read_csv("data/raw_kg_df.csv")
# Construct a toy dataset with only AMAZING movies.
toy_movie_list = [
    "m/terminator",
    "m/spaceballs",
    "m/dumb_and_dumber",
    "m/demolition_man",
    "m/hot_tub_time_machine",
]
toy_df = raw_kg_df.loc[raw_kg_df["entity_id"].isin(toy_movie_list)]
kg_obj = KnowledgeGraphGenerator(known_data_list=[toy_df])
# Instantiate the TransEFuser object.
fuser = TransEFuser(kg_obj)
# Fuse the knowledge graph.
fused_kg_df = fuser.fuse()

In [5]:
# The distance conveys how "true" the triple is given
# the training data.  Certain triples seem spot on.
print(fused_kg_df[fused_kg_df["value"] == "lowbrow"].sort_values("distance"))
print("\n")
print(fused_kg_df[fused_kg_df["value"] == "a murderous cyborg"].sort_values("distance"))

                  entity_id relation    value  distance
567  m/hot_tub_time_machine       is  lowbrow  1.549807
566       m/dumb_and_dumber       is  lowbrow  1.918002


         entity_id            relation               value  distance
1880  m/terminator  features the theme  a murderous cyborg   2.08909


In [6]:
# But the method generates a lot of noise.
random_triples = np.random.choice(range(len(fused_kg_df)), size=10)
fused_kg_df.iloc[random_triples]

Unnamed: 0,entity_id,relation,value,distance
161,m/demolition_man,is,amusing,1.947682
789,m/hot_tub_time_machine,features the theme,its moments,1.88548
2262,m/hot_tub_time_machine,features the miscellaneous theme,Private School,1.191115
2228,m/hot_tub_time_machine,features the miscellaneous theme,Snakes on a Plane,1.474899
893,m/dumb_and_dumber,features the theme,retaliation,2.079379
362,m/spaceballs,is,harmless,2.022146
324,m/demolition_man,is,ludicrous,2.06845
145,m/demolition_man,is,personal,1.867686
1979,m/hot_tub_time_machine,features the theme,"crummy-looking , exploitative , childishly gro...",1.580955
2222,m/terminator,features the miscellaneous theme,The Road Warrior,1.649705
