# Event Extraction

### Load and Understand Data

In [3]:
import os
import re
import json
import numpy as np
import pandas as pd

from tqdm import tqdm
from transformers import BertTokenizer
from nltk.tokenize import sent_tokenize

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Load news data
data_path = "../data/data.csv"
df = pd.read_csv(data_path)

print(f"Data shape: {df.shape}")
df.head()

Data shape: (7674, 7)


Unnamed: 0,Title,Source,Author,Published Date,URL,Country,Content
0,Fed officials warn of inflation risks from tar...,Straits Times,,2025-02-03T23:25:00Z,https://www.straitstimes.com/business/economy/...,Singapore,WASHINGTON - The Trump administration’s plan f...
1,While You Were Sleeping: 5 stories you might h...,Straits Times,,2025-02-03T22:35:49Z,https://www.straitstimes.com/world/while-you-w...,Singapore,"Trump, Sheinbaum reach deal to delay tariffs f..."
2,"Slower growth, souring business sentiment: How...",Channel NewsAsia,Abigail Ng,2025-02-03T22:00:00Z,https://www.channelnewsasia.com/singapore/trum...,Singapore,Analysts say the impact would be cushioned by ...
3,Malaysia's ECRL: A closer look at the US$11.2b...,Channel NewsAsia,Aqil Haziq Mahmud,2025-02-03T22:00:00Z,https://www.channelnewsasia.com/asia/malaysia-...,Singapore,In the second of a four-part series on the Eas...
4,"S&P 500, Nasdaq, pare losses as Trump’s Mexico...",Straits Times,,2025-02-03T21:22:52Z,https://www.straitstimes.com/business/companie...,Singapore,NEW YORK - The major stock indexes closed lowe...


### Preprocess the Data into OneIE Format

In [5]:
def preprocess_text(text):
    """Cleans and normalizes text"""
    text = text.replace("\n", " ").strip()
    return text

def tokenize_text(sentence, tokenizer):
    """Tokenizes text using WordPiece Tokenizer (BERT)"""
    tokens = sentence.split()  # Simple whitespace tokenization
    pieces = [tokenizer.tokenize(t) for t in tokens]
    token_lens = [len(p) for p in pieces]
    
    # Flatten pieces list
    flat_pieces = [p for sublist in pieces for p in sublist]
    
    return tokens, flat_pieces, token_lens

def process_news(df, output_path, tokenizer):
    """Convert news dataset to OneIE JSON"""
    print(f"Processing Documents...")
    doc_id_counter = 0

    with open(output_path, "w", encoding="utf-8") as w:
        for _, row in tqdm(df.iterrows(), total=len(df)):
            content = preprocess_text(row["Content"])
            sentences = sent_tokenize(content)

            for sent_id, sentence in enumerate(sentences):
                tokens, pieces, token_lens = tokenize_text(sentence, tokenizer)

                sent_obj = {
                    "doc_id": f"news_{doc_id_counter}",
                    "sent_id": sent_id, 
                    "sentence": sentence, 
                    "tokens": tokens,
                    "pieces": pieces,
                    "token_lens": token_lens,
                    "entity_mentions": [],
                    "relation_mentions": [],
                    "event_mentions": []
                }
                w.write(json.dumps(sent_obj) + "\n")

            doc_id_counter += 1

    print(f"Processing complete! Saved to {output_path}.")

In [6]:
# Load tokenizer
bert_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

output_path = "../data/news_oneie.json"

test_df = df.head(100)
process_news(test_df, output_path, bert_tokenizer)

Processing Documents...


100%|██████████| 100/100 [00:05<00:00, 18.86it/s]

Processing complete! Saved to ../data/news_oneie.json.





### Convert OneIE format to AMR-IE format

In [7]:
from amr_ie.transform_for_amr import transform_dataset
from transformers import RobertaTokenizerFast

input_path = "../data/news_oneie.json"
output_path = "../data/news_amrie.json"

tokenizer = RobertaTokenizerFast.from_pretrained("roberta-large")
transform_dataset(input_path, output_path, tokenizer)

In [9]:
import json
import nltk
from transformers import RobertaTokenizer

tokenizer = RobertaTokenizer.from_pretrained("roberta-large")

def transform_for_amr(input_json_path, output_json_path, tokenizer):
    """Transforms OneIE Json format into AMR-compatible JSON format"""
    transformed_data = []

    with open(input_json_path, "r", encoding="utf-8") as f:
        for line in f:
            instance = json.loads(line.strip())

            # Extract original sentence
            sentence = instance["sentence"]
            # Tokenize sentence into words (NLTK)
            word_tokens = nltk.word_tokenize(sentence)
            # Subword tokenization 
            subword_pieces = [tokenizer.tokenize(token) for token in word_tokens]
            token_lens = [len(pieces) for pieces in subword_pieces]

            # Flatten subword pieces into single list
            flat_pieces = [piece for pieces in subword_pieces for piece in pieces]

            # Encode subword pieces into IDs
            piece_ids = tokenizer.convert_tokens_to_ids(flat_pieces)

            assert sum(token_lens) == len(flat_pieces), f"Tokenization mismatch in {instance['sent_id']}"

            # Fix entity mentions offsets
            fixed_entities = []
            for entity in instance["entity_mentions"]:
                start_idx, end_idx = entity["start"], entity["end"]
                fixed_entities.append({
                    "id": entity["id"],
                    "text": entity["text"],
                    "entity_type": entity["entity_type"],
                    "mention_type": entity["mention_type"],
                    "start": start_idx,
                    "end": end_idx
                })
            # Fix relation mentions
            fixed_relations = []
            for relation in instance["relation_mentions"]:
                fixed_relations.append({
                    "relation_type": relation["relation_type"],
                    "arguments": relation["arguments"]  # Ensure the relation arguments remain consistent
                })

            # Fix event mentions
            fixed_events = []
            for event in instance["event_mentions"]:
                trigger_start, trigger_end = event["trigger"]["start"], event["trigger"]["end"]
                fixed_events.append({
                    "event_type": event["event_type"],
                    "trigger": {
                        "text": event["trigger"]["text"],
                        "start": trigger_start,
                        "end": trigger_end
                    },
                    "arguments": event["arguments"]
                })
            
            # Create transformed instance
            transformed_instance = {
                "doc_id": instance["doc_id"],
                "sent_id": instance["sent_id"],
                "sentence": sentence,
                "tokens": word_tokens,  # Original word-level tokens
                "pieces": flat_pieces,  # Subword tokenized pieces
                "piece_ids": piece_ids,
                "token_lens": token_lens,  # Length of subword pieces per token
                "entity_mentions": fixed_entities,
                "relation_mentions": fixed_relations,
                "event_mentions": fixed_events
            }

            transformed_data.append(transformed_instance)

    # Save json file
    with open(output_json_path, "w", encoding="utf-8") as f_out:
        for entry in transformed_data:
            f_out.write(json.dumps(entry, ensure_ascii=False) + "\n")

    print(f"Transformed data saved to: {output_json_path}")

In [10]:
# Transform data
transform_for_amr(
    input_json_path="../data/news_oneie.json",
    output_json_path="../data/news_amrie.json",
    tokenizer=tokenizer
)

Transformed data saved to: ../data/news_amrie.json


### Generate AMR Graphs

In [3]:
import json
import nltk
from nltk.tokenize import word_tokenize
from transformers import RobertaTokenizer

# Ensure you have downloaded the tokenizer
nltk.download('punkt')

# Initialize RoBERTa tokenizer
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

# File paths
input_json = "../data/news_amrie.json"
output_json = "../data/news_amrie_fixed.json"

def process_json(input_path, output_path):
    processed_data = []
    
    with open(input_path, "r", encoding="utf-8") as f:
        for line in f:
            data = json.loads(line.strip())

            # Tokenize sentence using NLTK for "tokens"
            tokens = word_tokenize(data["sentence"])

            # Tokenize using RoBERTa for "pieces"
            tokenized = tokenizer(data["sentence"], add_special_tokens=False)
            pieces = tokenizer.convert_ids_to_tokens(tokenized["input_ids"])

            # Compute "token_lens" (mapping word -> subword splits)
            token_lens = []
            piece_idx = 0

            for word in tokens:
                word_pieces = tokenizer.tokenize(word)
                token_lens.append(len(word_pieces))
                piece_idx += len(word_pieces)

            # Update JSON entry
            data.update({
                "tokens": tokens,
                "pieces": pieces,
                "token_lens": token_lens
            })

            processed_data.append(data)

    # Save the fixed data
    with open(output_path, "w", encoding="utf-8") as f:
        for entry in processed_data:
            f.write(json.dumps(entry, ensure_ascii=False) + "\n")

    print(f"✅ Processed data saved to {output_path}")

# Run the script
process_json(input_json, output_json)


[nltk_data] Downloading package punkt to /Users/clareyeo/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Downloading: 100%|██████████| 899k/899k [00:00<00:00, 1.58MB/s]
Downloading: 100%|██████████| 456k/456k [00:00<00:00, 15.9MB/s]


✅ Processed data saved to ../data/news_amrie_fixed.json


In [11]:
from amr_ie.process_amr import get_amr_data

get_amr_data(
    json_path="../data/news_amrie.json",
    graph_pkl_path="../data/news_graphs.pkl",
    amr_path="../data/news_amrs.pkl",
    checkpoint_dir="../models/amr_general/checkpoint_best.pt"
)

Using backend: pytorch


using CPU for models


Using cache found in /Users/clareyeo/.cache/torch/hub/pytorch_fairseq_master


running build_ext




copying build/lib.macosx-10.7-x86_64-3.6/fairseq/libbleu.cpython-36m-darwin.so -> fairseq
copying build/lib.macosx-10.7-x86_64-3.6/fairseq/data/data_utils_fast.cpython-36m-darwin.so -> fairseq/data
copying build/lib.macosx-10.7-x86_64-3.6/fairseq/data/token_block_utils_fast.cpython-36m-darwin.so -> fairseq/data
copying build/lib.macosx-10.7-x86_64-3.6/fairseq/libbase.cpython-36m-darwin.so -> fairseq
copying build/lib.macosx-10.7-x86_64-3.6/fairseq/libnat.cpython-36m-darwin.so -> fairseq
copying build/lib.macosx-10.7-x86_64-3.6/alignment_train_cpu_binding.cpython-36m-darwin.so -> 
loading archive file http://dl.fbaipublicfiles.com/fairseq/models/roberta.large.tar.gz from cache at /Users/clareyeo/.cache/torch/pytorch_fairseq/83e3a689e28e5e4696ecb0bbb05a77355444a5c8a3437e0f736d8a564e80035e.c687083d14776c1979f3f71654febb42f2bb3d9a94ff7ebdfe1ac6748dba89d2
| dictionary: 50264 types
Finished loading models
Running on batch size: 128


roberta: 100%|██████████| 255/255 [13:54<00:00,  3.27s/it]


2548


decoding:   5%|▌         | 1/20 [08:34<2:42:55, 514.49s/it]

Foreign workers work seven days a week for lower salaries , while local workers might ask for multiple days off in a week and sometimes clash with their Chinese supervisors , he said . <ROOT>


decoding:  10%|█         | 2/20 [13:22<1:54:19, 381.09s/it]

I 'm not sure going through Pahang and the east coast of Malaya would have the same sort of so-called economic multiplier effect , or if the passenger as well as the cargo traffic would eventually be enough to sort of make it profitable. ” Ragu Sampasivam , the chief operating officer of the East Coast Economic Region Development Council who was involved in the initial feasibility studies for the ECRL , told CNA that he remains confident about the freight projections . <ROOT>
The large loan given for this BRI project has raised concerns from some observers about a Chinese debt trap , while a lack of transparency on the latest loan agreement means it remains unclear how the updated project cost has impacted repayment terms , observers have said . <ROOT>
Nvidia and a gauge of semiconductor stocks fell . <ROOT>


decoding:  15%|█▌        | 3/20 [17:18<1:29:15, 315.05s/it]

UNITED NATIONS - A United Nations Security Council meeting in two weeks could be a “ very good opportunity ” for Chinese Foreign Minister Wang Yi and US Secretary of State Marco Rubio to meet , China ’ s UN envoy said on Feb 3 , stressing the need for cooperation because “ so much is at stake ” , while blasting Washington on tariffs . <ROOT>
After talks with Rubio , Mulino signaled a willingness to review a key 25-year concession to Hong Kong-based CK Hutchison Holdings , renewed in 2021 for the operation of ports at both entrances of the canal , pending the results of an audit . <ROOT>


decoding:  20%|██        | 4/20 [20:36<1:11:40, 268.81s/it]

“ We started to ask why there isn ’ t a solution that enables proactive detection within the data storage hardware , ” says Ms Chan . <ROOT>


decoding:  25%|██▌       | 5/20 [22:27<52:57, 211.87s/it]  

LONDON : Stock markets tumbled while the dollar rallied and oil prices rose on Monday ( Feb 3 ) over concerns about the global economy after United States President Donald Trump launched trade wars with Canada , China and Mexico . <ROOT>
With Canada and Mexico the top sources of US crude oil imports , US oil prices jumped more than 1 per cent , while gasoline futures rose nearly 3 per cent . <ROOT>
With Canada and Mexico the top sources of US crude oil imports , US oil prices jumped more than 1 per cent , while gasoline futures rose nearly 3 per cent . <ROOT>


decoding:  30%|███       | 6/20 [26:52<53:40, 230.02s/it]

Racial and religious harmony scores rise in S ’ pore : IPS study Researchers polled a representative sample of 4,000 Singaporeans and permanent residents aged 18 and above . <ROOT>


decoding:  40%|████      | 8/20 [33:20<40:56, 204.73s/it]

With Canada and Mexico the top sources of US crude oil imports , US oil prices jumped more than US $ 1 , while gasoline futures rose 3 per cent . <ROOT>


decoding:  45%|████▌     | 9/20 [36:22<36:14, 197.72s/it]

With Canada and Mexico the top sources of U.S. crude oil imports , U.S. oil prices jumped more than $ 1 , while gasoline futures rose 3 % . <ROOT>


decoding:  50%|█████     | 10/20 [38:05<28:03, 168.37s/it]

“ However , if it drags out and pushes prices higher , there will be serious questions about why we can ’ t solve the border and drug challenges with levers other than tariffs , ” she said . <ROOT>


decoding:  55%|█████▌    | 11/20 [40:25<23:56, 159.65s/it]

The President has cited illegal immigration and the trafficking of the deadly opioid fentanyl as reasons for the “ emergency ” measures . <ROOT>


decoding:  60%|██████    | 12/20 [53:09<45:49, 343.68s/it]

South Africa had previously made a request last year for what is called the Taipei Liaison Office to leave Pretoria . <ROOT>
South Africa had previously made a request last year for what is called the Taipei Liaison Office to leave Pretoria . <ROOT>
South Africa had previously made a request last year for what is called the Taipei Liaison Office to leave Pretoria . <ROOT>


decoding:  65%|██████▌   | 13/20 [57:55<38:02, 326.01s/it]

“ We ’ re talking about thousands and thousands of jobs being lost , ” said John D ’ Agnolo , the president of a local union representing workers at Ford Motor ’ s engine plant in Windsor . <ROOT>
The move provoked immediate vows of retaliation , while analysts warned that the ensuing trade war would likely decrease US growth and raise consumer prices over the short term . <ROOT>
CNA also spoke to residents and business owners on how they think the ECRL might change their neighbourhoods . <ROOT>


decoding:  70%|███████   | 14/20 [1:02:43<31:27, 314.63s/it]

PANAMA CITY : United States Secretary of State Marco Rubio on Sunday ( Feb 2 ) warned Panama 's President Jose Raul Mulino that Washington will `` take measures necessary '' if Panama does not immediately take steps to end what President Donald Trump sees as China 's influence and control over the Panama Canal . <ROOT>
Mulino also showed willingness to review some Chinese businesses in Panama , including a key 25-year concession to Hong Kong-based CK Hutchison Holdings , renewed in 2021 for the operation of ports at both entrances of the canal , pending the results of an audit . <ROOT>


decoding:  75%|███████▌  | 15/20 [1:05:39<22:44, 272.80s/it]

The move provoked immediate vows of retaliation from all three countries , while analysts warned that the ensuing trade war would likely slow US growth and raise consumer prices over the short term . <ROOT>
The potential to drive up consumer prices is a particularly sensitive area for investors , who are worried about a revival in inflation causing the Federal Reserve to stop cutting rates . <ROOT>
The potential to drive up consumer prices is a particularly sensitive area for investors , who are worried about a revival in inflation causing the Federal Reserve to stop cutting rates . <ROOT>
Once the Fed resumes cuts , the 10-year yield should drift towards 4 per cent by the year end . <ROOT>


decoding:  80%|████████  | 16/20 [1:08:47<16:29, 247.47s/it]

Mr Bo Zhengyuan , a partner at Plenum research consultancy in Shanghai , said that China is biding its time to observe how brutal the US ’ s trade fight will be with Mexico , Canada and the EU . <ROOT>
Just as Mr Rubio started his trip , Mr Trump signed off on sanctions on the top three US trading partners – Canada , Mexico and China – and told Canada it should be the 51st US state . <ROOT>
The new administration sees foreign aid as charity and does not consider how it serves US geopolitical interests , said a diplomat from a G-7 country who declined to be identified to discuss sensitive issues . <ROOT>
The move provoked immediate vows of retaliation , while analysts warned the ensuing trade war would likely decrease US growth and raise consumer prices over the short term . <ROOT>
“ Then China can plan accordingly as to how it wants to play its cards , ” he said . <ROOT>


decoding:  85%|████████▌ | 17/20 [1:14:52<14:08, 282.78s/it]

[93mmachine 4 not closed at step 790[0m
“ These are volatile situations , so we ’ ll see how long it lasts and what happens , ” CFO Rick Dierker said in an earnings call on Jan 31 , adding that they have the ability to “ be reactive when we need to be. ” REUTERS More on this Topic Join ST 's Telegram channel and get the latest breaking news delivered to you . <ROOT>
The German carmaker is a tiny player in India ’ s 4 million units a year car market , the world ’ s third biggest , where its Audi brand also lags competitors in the luxury segment like Mercedes and BMW . <ROOT>
“ As long as these universal tariffs are in place , Americans will be forced to pay higher prices on everyday consumer goods , ” said Mr David French , NRF executive vice-president of government relations . <ROOT>
The tax notice “ deals a body blow ” to the much-advertised “ policy of ease of doing business in India for foreign investors , ” the company said . <ROOT>


decoding:  90%|█████████ | 18/20 [1:17:36<08:14, 247.15s/it]

After long , fraught negotiations the two sides agreed what became known as the `` phase one '' trade deal – a ceasefire in the nearly two-year-old trade war . <ROOT>


decoding:  95%|█████████▌| 19/20 [1:23:23<04:37, 277.04s/it]

According to economists at S & P Global , of the imports coming into the US from Canada and Mexico , more than 18 per cent of their value was created in the US , before being sent to those countries . <ROOT>
Large loophole The smaller-value shipments account for more than a tenth of China ’ s exports to the US , according to research from economists at Nomura Holdings Inc . <ROOT>


decoding: 100%|██████████| 20/20 [1:26:01<00:00, 258.08s/it]

In her post , Sheinbaum also rejected as `` slander '' the allegation by the White House that drug cartels have an alliance with the Mexican government , a point used by the administration of Trump to explain why it had imposed the tariffs . <ROOT>
The tariffs on the two biggest sources of US crude imports will raise costs for the heavier crude grades US refineries need for optimum production , industry sources said , cutting their profitability and potentially forcing production cuts . <ROOT>





### Load Model

In [12]:
import os
import json
import torch
from tqdm import tqdm

from torch.utils.data import DataLoader
from transformers import BertTokenizer, RobertaTokenizer

from amr_ie.model import OneIE
from amr_ie.config import Config
from amr_ie.data import IEDataset, Batch
from amr_ie.convert import json_to_cs

In [13]:
# Configuration
MODEL_PATH = "../models/amr_general/best.role.mdl"
DEVICE = torch.device("cpu")
BERT_MODEL = "roberta-large"
CONFIG_PATH = "../models/amr_general/config/ace05.json"

def load_amr_graphs(graph_path):
    """Loads AMR graphs, alignments, and existence dictionaries from a .pkl file."""
    with open(graph_path, "rb") as f:
        graphs, alignments, exist = torch.load(f)
    return graphs, alignments, exist

def load_config(config_path):
    with open(config_path, "r", encoding="utf-8") as f:
        config = json.load(f)
    return config

def load_model(model_path, bert_model, device=0, gpu=False, beam_size=5):
    print(f"Loading the model from {model_path}")
    map_location = f"cuda:{device}" if gpu else "cpu"
    state = torch.load(model_path, map_location=map_location)

    config = state["config"]
    config["gpu_device"] = "cpu"
    if isinstance(config, dict):
        config = Config.from_dict(config)

    config.bert_cache_dir = os.path.join(os.getcwd(), "bert")
    vocabs = state["vocabs"]
    valid_patterns = state["valid"]

    model = OneIE(config, vocabs, valid_patterns)
    model.load_bert(bert_model)
    model.load_state_dict(state["model"])
    model.beam_size = beam_size

    if gpu:
        model.cuda(device)

    tokenizer = RobertaTokenizer.from_pretrained(config.bert_model_name, do_lower_case=False)
    
    return model, tokenizer, config, vocabs

# Load model
ace_config = load_config(CONFIG_PATH)
model, tokenizer, config, vocabs = load_model(MODEL_PATH, BERT_MODEL)

Loading the model from ../models/amr_general/best.role.mdl
number of global features: 8838
Loading pre-trained BERT model roberta-large


In [14]:
# Load dataset
amr_graphs, alignments, exist = load_amr_graphs(graph_path="../data/news_graphs.pkl")

dataset = IEDataset(
    path="../data/news_amrie.json",
    graph_list=amr_graphs,
    align_list=alignments,
    exist_list=exist,
    max_length=128, 
    gpu=False
)
dataset.numberize(tokenizer, vocabs)
dataloader = DataLoader(dataset, batch_size=20, shuffle=False, collate_fn=dataset.collate_fn)

Discarded 4 overlength instances
Loaded 2544 instances from ../data/news_amrie.json


In [15]:
def extract_events_and_arguments(model, dataloader, config, gpu=False):
    """Extracts events and arguments from the news dataset using OneIE."""
    extracted_results = []

    for batch in tqdm(dataloader, desc="Extracting events and arguments"):
        with torch.no_grad():
            batch = Batch(**{k: v.to("cuda") if isinstance(v, torch.Tensor) and gpu else v for k, v in batch._asdict().items()})
            graphs = model.predict(batch, epoch=1)  # Get predicted event graphs
        
        for sent_id, tokens, graph in zip(batch.sent_ids, batch.tokens, graphs):
            graph.clean(relation_directional=config.relation_directional, 
                        symmetric_relations=config.symmetric_relations)  # Ensure the extracted graph is processed correctly
            
            # Extracting mentions directly from the Graph object
            extracted_results.append({
                "sent_id": sent_id,
                "tokens": tokens,
                "events": [(t[0], t[1], t[2]) for t in graph.triggers],  # (start, end, event_type)
                "entities": [(e[0], e[1], e[2]) for e in graph.entities],  # (start, end, entity_type)
                "relations": [(r[0], r[1], r[2]) for r in graph.relations],  # (entity_1, entity_2, relation_type)
                "roles": [(r[0], r[1], r[2]) for r in graph.roles],  # (event_index, entity_index, role_type)
            })

    return extracted_results

In [16]:
extracted_data = extract_events_and_arguments(model, dataloader, config, gpu=False)

Extracting events and arguments: 100%|██████████| 128/128 [53:24<00:00, 25.03s/it] 


In [19]:
def print_extracted_results(extracted_results, vocabs):
    """
    Prints the extracted entities, relations, roles, and triggers with their actual labels.
    :param extracted_results: List of extracted event/argument mentions from OneIE.
    :param vocabs: Dictionary containing label mappings for entity types, event types, relations, and roles.
    """
    entity_type_itos = {v: k for k, v in vocabs["entity_type"].items()}  # Map index to entity label
    event_type_itos = {v: k for k, v in vocabs["event_type"].items()}  # Map index to event label
    relation_type_itos = {v: k for k, v in vocabs["relation_type"].items()}  # Map index to relation label
    role_type_itos = {v: k for k, v in vocabs["role_type"].items()}  # Map index to role label

    for result in extracted_results:
        if not (result["entities"] and result["events"] and result["relations"] and result["roles"]):
            continue
        
        print(f"\n📰 **Sentence ID: {result['sent_id']}**")
        print("📌 **Tokens:**", " ".join(result["tokens"]))

        print("\n🔹 **Entities:**")
        if result["entities"]:
            for start, end, entity_type in result["entities"]:
                entity_label = entity_type_itos.get(entity_type, "UNKNOWN")
                entity_text = " ".join(result["tokens"][start:end])
                print(f"  - {entity_text} ({entity_label}) [Pos: {start}-{end}]")
        else:
            print("  - No entities found.")

        print("\n🔥 **Events (Triggers):**")
        if result["events"]:
            for start, end, event_type in result["events"]:
                event_label = event_type_itos.get(event_type, "UNKNOWN")
                trigger_text = " ".join(result["tokens"][start:end])
                print(f"  - {trigger_text} ({event_label}) [Pos: {start}-{end}]")
        else:
            print("  - No events found.")

        print("\n🔗 **Relations:**")
        if result["relations"]:
            for ent1, ent2, relation_type in result["relations"]:
                relation_label = relation_type_itos.get(relation_type, "UNKNOWN")
                entity_1_text = "".join(result["tokens"][ent1])
                entity_2_text = "".join(result["tokens"][ent2])
                print(f"  - {entity_1_text} --[{relation_label}]--> {entity_2_text}")
        else:
            print("  - No relations found.")

        print("\n🎭 **Roles:**")
        if result["roles"]:
            for event_idx, entity_idx, role_type in result["roles"]:
                role_label = role_type_itos.get(role_type, "UNKNOWN")
                event_text = " ".join(result["tokens"][result['events'][event_idx][0]:result['events'][event_idx][1]])
                entity_text = " ".join(result["tokens"][result['entities'][entity_idx][0]:result['entities'][entity_idx][1]])
                print(f"  - {entity_text} plays role '{role_label}' in event '{event_text}'")
        else:
            print("  - No roles found.")
        
        print("\n" + "=" * 80)  # Separator for better readability

In [20]:
print_extracted_results(extracted_data, vocabs)


📰 **Sentence ID: 8**
📌 **Tokens:** On Feb 3 , Mr Trump said he was suspending the tariffs on Mexico for a month after President Claudia Sheinbaum agreed to send soldiers to the US-Mexican border to curb drug trafficking .

🔹 **Entities:**
  - President Claudia Sheinbaum (PER) [Pos: 18-21]
  - border (LOC) [Pos: 28-29]
  - Trump (PER) [Pos: 5-6]
  - Mexico (GPE) [Pos: 13-14]
  - soldiers (PER) [Pos: 24-25]
  - Mr (PER) [Pos: 4-5]
  - US-Mexican (GPE) [Pos: 27-28]

🔥 **Events (Triggers):**
  - send (Movement:Transport) [Pos: 23-24]

🔗 **Relations:**
  - Feb --[PHYS]--> Mr
  - Feb --[PART-WHOLE]--> said

🎭 **Roles:**
  - border plays role 'Destination' in event 'send'


📰 **Sentence ID: 12**
📌 **Tokens:** READ MORE HERE US military flight deporting migrants to India , says official A US military plane is deporting migrants to India , a US official said on Feb 3 , the farthest destination of the Trump administration ’ s military transport flights for migrants .

🔹 **Entities:**
  - offici