# Setup

In [1]:
import helper_tools.parser as parser
import importlib
import pandas as pd
import warnings

warnings.filterwarnings("ignore")

importlib.reload(parser)

relation_df, entity_df, docs = parser.synthie_parser("train")
entity_set = entity_df[['entity', 'entity_uri']].drop_duplicates()
predicate_set_df = relation_df[["predicate", "predicate_uri"]].drop_duplicates()

Fetching 27 files:   0%|          | 0/27 [00:00<?, ?it/s]

100%|██████████| 10/10 [00:00<00:00, 10425.81it/s]


Uploading Entities to Qdrant.


100%|██████████| 46/46 [00:06<00:00,  7.29it/s]


Uploading Predicates to Qdrant.


100%|██████████| 29/29 [00:04<00:00,  7.09it/s]


In [3]:
from langgraph.graph import StateGraph, START, END
from approaches.baseline.setup import cIEState, langfuse_handler
from approaches.baseline.agents.supervisor import agent as supervisor_agent
from approaches.baseline.agents.entity_extractor import agent as entity_extraction_agent
from approaches.baseline.agents.relation_extractor import agent as relation_extraction_agent
from approaches.baseline.agents.uri_detector import agent as uri_detection_agent

builder = StateGraph(cIEState)
builder.add_node("supervisor", supervisor_agent)
builder.add_node("entity_extraction_agent", entity_extraction_agent)
builder.add_node("relation_extraction_agent",relation_extraction_agent)
builder.add_node("uri_detection_agent",uri_detection_agent)

builder.add_edge(START, "supervisor")

graph = builder.compile()

In [4]:
target_doc = docs.iloc[3]
doc_id = target_doc["docid"]
text = target_doc["text"]
text

'Lambda Mensae is a star in the constellation Mensa. It was discovered by Nicolas-Louis de Lacaille, and named after Table Mountain in South Africa. Mensa shares borders with Chamaeleon and Hydrus.'

In [6]:
response_state = graph.invoke({"text": text, "messages":[], "debug": False}, config={"callbacks": [langfuse_handler], "recursion_limit": 100})

# Pretty Print Response State

In [7]:
print(f"""cIE for text: {response_state["text"]}

Messages:""")
for message in response_state["messages"]:
    print(message)

cIE for text: Lambda Mensae is a star in the constellation Mensa. It was discovered by Nicolas-Louis de Lacaille, and named after Table Mountain in South Africa. Mensa shares borders with Chamaeleon and Hydrus.

Messages:

-- Supervisor Agent --
<goto>entity_extraction_agent</goto>

-- Entity Extraction Agent --
[Lambda Mensae, star, constellation Mensa, Nicolas-Louis de Lacaille, Table Mountain, South Africa, Mensa, Chamaeleon, Hydrus]

-- Supervisor Agent --
<goto>relation_extraction_agent</goto>

-- Relation Extraction Agent --
<relation>Lambda Mensae;is in;Mensa</relation>
<relation>Lambda Mensae;discovered by;Nicolas-Louis de Lacaille</relation>
<relation>Lambda Mensae;named after;Table Mountain</relation>
<relation>Mensa;shares borders with;Chamaeleon</relation>
<relation>Mensa;shares borders with;Hydrus</relation>
<relation>Lambda Mensae;located in;constellation Mensa</relation>
<relation>Table Mountain;located in;South Africa</relation>

-- Supervisor Agent --
<goto>uri_detecti

# Evaluation

In [9]:
from helper_tools.evaluation import parse_turtle, evaluate, get_uri_labels

In [10]:
import re

turtle_string = re.search(r'<ttl>(.*?)</ttl>', response_state["messages"][-1], re.DOTALL).group(1)
pred_relation_df = parse_turtle(turtle_string)
doc_relation_df = relation_df[relation_df["docid"] == doc_id][["subject_uri", "predicate_uri", "object_uri"]]
correct_relation_df = pred_relation_df.merge(doc_relation_df[["subject_uri", "predicate_uri", "object_uri"]], on=["subject_uri", "predicate_uri", "object_uri"], how="inner")

In [11]:
get_uri_labels(pred_relation_df, entity_set, predicate_set_df)

Unnamed: 0,subject_uri,predicate_uri,object_uri,subject,predicate,object
0,http://www.wikidata.org/entity/Q213360,http://www.wikidata.org/entity/P131,http://www.wikidata.org/entity/Q258,Table_Mountain,located in the administrative territorial entity,Unknown
1,http://www.wikidata.org/entity/Q9289,http://www.wikidata.org/entity/P47,http://www.wikidata.org/entity/Q10457,Mensa_(constellation),shares border with,Chamaeleon
2,http://www.wikidata.org/entity/Q3304236,http://www.wikidata.org/entity/P138,http://www.wikidata.org/entity/Q213360,Lambda_Mensae,named after,Table_Mountain
3,http://www.wikidata.org/entity/Q9289,http://www.wikidata.org/entity/P47,http://www.wikidata.org/entity/Q10416,Mensa_(constellation),shares border with,Hydrus
4,http://www.wikidata.org/entity/Q3304236,http://www.wikidata.org/entity/P61,http://www.wikidata.org/entity/Q202703,Lambda_Mensae,discoverer or inventor,Nicolas-Louis_de_Lacaille
5,http://www.wikidata.org/entity/Q3304236,http://www.wikidata.org/entity/P131,http://www.wikidata.org/entity/Q9289,Lambda_Mensae,located in the administrative territorial entity,Mensa_(constellation)


In [12]:
get_uri_labels(doc_relation_df, entity_set, predicate_set_df)

Unnamed: 0,subject_uri,predicate_uri,object_uri,subject,predicate,object
0,http://www.wikidata.org/entity/Q3304236,http://www.wikidata.org/entity/P59,http://www.wikidata.org/entity/Q9289,Lambda_Mensae,constellation,Mensa_(constellation)
1,http://www.wikidata.org/entity/Q3304236,http://www.wikidata.org/entity/P31,http://www.wikidata.org/entity/Q523,Lambda_Mensae,instance of,Star
2,http://www.wikidata.org/entity/Q9289,http://www.wikidata.org/entity/P61,http://www.wikidata.org/entity/Q202703,Mensa_(constellation),discoverer or inventor,Nicolas-Louis_de_Lacaille
3,http://www.wikidata.org/entity/Q9289,http://www.wikidata.org/entity/P47,http://www.wikidata.org/entity/Q10457,Mensa_(constellation),shares border with,Chamaeleon
4,http://www.wikidata.org/entity/Q9289,http://www.wikidata.org/entity/P138,http://www.wikidata.org/entity/Q213360,Mensa_(constellation),named after,Table_Mountain
5,http://www.wikidata.org/entity/Q9289,http://www.wikidata.org/entity/P47,http://www.wikidata.org/entity/Q10416,Mensa_(constellation),shares border with,Hydrus


In [13]:
get_uri_labels(correct_relation_df, entity_set, predicate_set_df)

Unnamed: 0,subject_uri,predicate_uri,object_uri,subject,predicate,object
0,http://www.wikidata.org/entity/Q9289,http://www.wikidata.org/entity/P47,http://www.wikidata.org/entity/Q10457,Mensa_(constellation),shares border with,Chamaeleon
1,http://www.wikidata.org/entity/Q9289,http://www.wikidata.org/entity/P47,http://www.wikidata.org/entity/Q10416,Mensa_(constellation),shares border with,Hydrus


In [14]:
evaluate(turtle_string, doc_id, relation_df)

(0.3333333333333333, 0.3333333333333333, 0.3333333333333333)

# Evaluation on Test

In [15]:
evaluation_df = []   

In [18]:
for i in range(10):
    target_doc = docs.iloc[i]
    doc_id = target_doc["docid"]
    text = target_doc["text"]
    print(f"doc: {doc_id} - text: {text}")
    response = graph.invoke({"text": text, "messages":[], "debug": False}, config={"callbacks": [langfuse_handler], "recursion_limit": 100})
    evaluation_df.append([doc_id, *evaluate(re.search(r'<ttl>(.*?)</ttl>', response["messages"][-1], re.DOTALL).group(1), doc_id, relation_df)])

doc: 0 - text: Corfe Castle railway station is a station on the Swanage Railway in the village of Corfe Castle, in the United Kingdom.
doc: 1 - text: Ricardo Lumengo is a Swiss politician. He was born in Fribourg and lives in Biel/Bienne. He works in Bern and speaks the Kongo language.
doc: 2 - text: The National Parks Project is a nature documentary film presented in the ImagineNATIVE Film and Media Arts Festival. It is in Inuktitut.
doc: 3 - text: Lambda Mensae is a star in the constellation Mensa. It was discovered by Nicolas-Louis de Lacaille, and named after Table Mountain in South Africa. Mensa shares borders with Chamaeleon and Hydrus.
doc: 4 - text: John Derek was an American actor, director and photographer. He was married to Pati Behrs. He died of cardiovascular disease.
doc: 5 - text: Beta2-adrenergic agonists are agonists of the beta-2 adrenergic receptor, which is found in early modern humans and is involved in bone resorption.
doc: 6 - text: A heating element is an electr

In [19]:
evaluation_df = pd.DataFrame(evaluation_df, columns=["docid", "precision", "recall", "f1_score"])
evaluation_df

Unnamed: 0,docid,precision,recall,f1_score
0,0,0.2,0.25,0.222222
1,1,0.666667,0.8,0.727273
2,2,0.333333,0.25,0.285714
3,3,0.333333,0.333333,0.333333
4,4,0.333333,1.0,0.5
5,5,0.5,0.666667,0.571429
6,6,0.166667,1.0,0.285714
7,7,0.25,0.25,0.25
8,8,0.5,0.5,0.5
9,9,0.5,0.4,0.444444


In [20]:
print(f'F1 (Macro Avg.): {evaluation_df["f1_score"].mean()}')
print(f'Precision (Macro Avg.): {evaluation_df["precision"].mean()}')
print(f'Recall (Macro Avg.): {evaluation_df["recall"].mean()}')

F1 (Macro Avg.): 0.412012987012987
Precision (Macro Avg.): 0.3783333333333333
Recall (Macro Avg.): 0.545
