In [18]:
import re
from owlrl import Graph, DeductiveClosure, OWLRL_Semantics, rdflib
import pandas as pd

In [19]:
original_filename = "./mr-en-20250605072510"
cleaned_filename = f"{original_filename}_CLEANED"

In [None]:
file_content = ""
with open(f"{original_filename}.rdf", encoding="utf-8") as original_file:
    file_content = original_file.read()

In [None]:
url_attribute_pattern = r'="https?:\/\/.+"'
url_attributes_with_space_in_middle = [
    attribute for attribute in re.findall(url_attribute_pattern, file_content)
    if " " in attribute
]

for attribute in url_attributes_with_space_in_middle:
    attribute_without_spaces = attribute.replace(" ", "%20")
    file_content = file_content.replace(attribute, attribute_without_spaces)

with open(f"{cleaned_filename}.rdf", "w", encoding="utf-8") as file:
    file.write(file_content)

In [20]:
graph = Graph()
with open(f"{cleaned_filename}.rdf", encoding="utf-8") as file:
    graph.parse(file, format="xml")
DeductiveClosure(OWLRL_Semantics).expand(graph)

In [21]:
metaphor_to_source_frame_map = dict([
    (subj, obj)
    for (subj, pred, obj) in graph.triples((None, None, None))
    if pred.fragment == "hasSourceFrame"
])

metaphor_to_target_frame_map = dict([
    (subj, obj)
    for (subj, pred, obj) in graph.triples((None, None, None))
    if pred.fragment == "hasTargetFrame"
])

In [22]:
names = dict([
    (subj, obj)
    for (subj, pred, obj) in graph.triples((None, None, None))
    if pred.fragment == "hasName"
])

In [23]:
metaphors = set([
    subj 
    for (subj, pred, obj) in graph.triples((None, None, None))
    if isinstance(obj, rdflib.term.URIRef) and obj.fragment == "Metaphor"
    and isinstance(pred, rdflib.term.URIRef) and pred.fragment == "type"
])

In [24]:
def get_source_frame(metaphor: rdflib.term.URIRef):
    source_frame = metaphor_to_source_frame_map.get(metaphor)
    if source_frame:
        return names.get(source_frame)
    
    return None

def get_target_frame(metaphor: rdflib.term.URIRef):
    target_frame = metaphor_to_target_frame_map.get(metaphor)
    if target_frame:
        return names.get(target_frame)
    
    return None

In [25]:
metaphors_df = pd.DataFrame(
    data=[(names.get(metaphor), get_source_frame(metaphor), get_target_frame(metaphor)) for metaphor in metaphors],
    columns=["metaphor", "source_frame", "target_frame"]
)\
.set_index("metaphor")\
.sort_values(by="metaphor")

metaphors_df

Unnamed: 0_level_0,source_frame,target_frame
metaphor,Unnamed: 1_level_1,Unnamed: 2_level_1
A SEQUENCE OF ACTIONS IS A SEQUENCE OF MOVEMENTS,Motion along a path,Action
A SEQUENCE OF VIOLENT EVENTS IS A FLUID,Fluid motion,Violence
ABILITY TO ACT IS ABILITY TO MOVE,Self propelled motion,Ability to act
ABILITY TO EVALUATE GOVERNMENT IS ABILITY TO SEE,Seeing,Citizen evaluation of government
ABILITY TO EVALUATE IS ABILITY TO SEE,Seeing,Evaluation
...,...,...
WINNING AN ELECTION IS WINNING A BOXING MATCH,Physical competition,Election
WINNING AN ELECTION IS WINNING A RACE,Race,Election
WINNING AN ELECTION IS WINNING A WAR,War,Election
WORDS ARE CONTAINERS,Containing,Word


In [None]:
metaphors_df.to_csv("./meta-net_metaphor_list.csv")

In [27]:
source_frames_set = set([names.get(source).value for source in metaphor_to_source_frame_map.values()])
source_frames_df = pd.DataFrame(source_frames_set, columns=["source_frame"])\
.set_index("source_frame")\
.sort_index()

source_frames_df

?
Access to an object
Actor
Addiction
Adoption
...
Work scenario
Writing
Zealotry
minor factions
physical integrity


In [None]:
source_frames_df.to_csv("./meta-net_source_frames_list.csv", header=None)

In [29]:
target_frames_set = set([names.get(target).value for target in metaphor_to_target_frame_map.values()])
target_frames_df = pd.DataFrame(target_frames_set, columns=["target_frame"])\
.set_index("target_frame")\
.sort_index()

target_frames_df

?
Ability to act
Abstract relatedness
Accepting
Access to education
...
Well-being
Word
agreement
balkan states
government power


In [None]:
target_frames_df.to_csv("./meta-net_target_frames_list.csv", header=None)

In [31]:
all_frames_set = source_frames_set | target_frames_set
all_frames_df = pd.DataFrame(all_frames_set, columns=["frame"])\
.set_index("frame")\
.sort_index()

all_frames_df

?
Ability to act
Abstract relatedness
Accepting
Access to an object
...
agreement
balkan states
government power
minor factions
physical integrity


In [None]:
all_frames_df.to_csv("./meta-net_all_frames_list.csv", header=None)