In [1]:
from transformers import AutoTokenizer, pipeline
from datasets import load_dataset
import pandas as pd
import itertools
from difflib import SequenceMatcher
import json
import numpy as np

import importlib
import _RE
importlib.reload(_RE)
from _RE import join_text, merge_result, combine_entities

In [2]:
def get_docred_data(get_distant=False):
    docred_data = load_dataset('docred', trust_remote_code=True)
    train_annotated = pd.DataFrame(docred_data['train_annotated'])
    train_distant = None
    if get_distant:
        train_distant = pd.DataFrame(docred_data['train_distant'])
    test = pd.DataFrame(docred_data['test'])
    validation = pd.DataFrame(docred_data['validation'])

    return train_annotated, train_distant, test, validation

In [6]:
train, _, test, validation = get_docred_data()

In [3]:
def get_info(instance):
    sents_raw = instance['sents']
    sents = [' '.join(sublist) for sublist in sents_raw]

    vertexSet = instance['vertexSet']
    labels = instance['labels']

    return sents, vertexSet, labels

In [20]:
sents, vertexSet, labels = get_info(train.iloc[0])

In [21]:
for i, e in enumerate(vertexSet):
    print(i, e)

0 [{'name': 'Zest Airways, Inc.', 'sent_id': 0, 'pos': [0, 4], 'type': 'ORG'}, {'name': 'Asian Spirit and Zest Air', 'sent_id': 0, 'pos': [10, 15], 'type': 'ORG'}, {'name': 'AirAsia Zest', 'sent_id': 0, 'pos': [6, 8], 'type': 'ORG'}, {'name': 'AirAsia Zest', 'sent_id': 6, 'pos': [19, 21], 'type': 'ORG'}]
1 [{'name': 'Ninoy Aquino International Airport', 'sent_id': 3, 'pos': [4, 8], 'type': 'LOC'}, {'name': 'Ninoy Aquino International Airport', 'sent_id': 0, 'pos': [26, 30], 'type': 'LOC'}]
2 [{'name': 'Pasay City', 'sent_id': 0, 'pos': [31, 33], 'type': 'LOC'}]
3 [{'name': 'Metro Manila', 'sent_id': 0, 'pos': [34, 36], 'type': 'LOC'}]
4 [{'name': 'Philippines', 'sent_id': 0, 'pos': [38, 39], 'type': 'LOC'}, {'name': 'Philippines', 'sent_id': 4, 'pos': [13, 14], 'type': 'LOC'}, {'name': 'Republic of the Philippines', 'sent_id': 5, 'pos': [25, 29], 'type': 'LOC'}]
5 [{'name': 'Manila', 'sent_id': 1, 'pos': [13, 14], 'type': 'LOC'}, {'name': 'Manila', 'sent_id': 3, 'pos': [9, 10], 'type':

In [18]:
for i in range(len(labels['head'])):
    print(f"head: {labels['head'][i]}; tail: {labels['tail'][i]}; relation: {labels['relation_text'][i]}")

head: 0; tail: 2; relation: headquarters location
head: 0; tail: 4; relation: country
head: 12; tail: 4; relation: country
head: 2; tail: 4; relation: country
head: 2; tail: 3; relation: located in the administrative territorial entity
head: 4; tail: 3; relation: contains administrative territorial entity
head: 5; tail: 4; relation: country
head: 3; tail: 2; relation: contains administrative territorial entity
head: 3; tail: 4; relation: located in the administrative territorial entity
head: 3; tail: 4; relation: country
head: 1; tail: 2; relation: located in the administrative territorial entity
head: 1; tail: 4; relation: country
head: 10; tail: 4; relation: country


In [34]:
def make_triplets(vertexSet, labels):
    '''
    Returns a list of triplet of format <head, relation, tail>
    `head` and `tail` contains a list of "synonym" entites (e.g. Swedish and Sweden)
    `relation` contains relation_id and relation_text (which explain the relation, e.g. "country")
    '''

    names = []
    types = []
    triplets = []

    head = labels['head']
    tail = labels['tail']
    relation_ids = labels['relation_id']
    relation_texts = labels['relation_text']

    if not len(head) == len(tail) == len(relation_texts) == len(relation_ids):
        raise ValueError("Labels are not unform length")

    # Get names and types from vertexSet
    for entities in vertexSet:
        sub_names = [entity['name'] for entity in entities]
        sub_types = [entity['type'] for entity in entities]
        names.append(sub_names)
        types.append(sub_types)

    # Construct triplets of the format [[head(s)], [relation_id, relation], [tail(s)]]
    for i in range(len(head)):
        head_index = head[i]
        tail_index = tail[i]
        relation_id = relation_ids[i]
        relation_text = relation_texts[i]

        head_entities = names[head_index]
        tail_entities = names[tail_index]
        relation = [relation_id, relation_text]
        triplets.append([head_entities, relation, tail_entities])
    
    return triplets

In [35]:
triplets = make_triplets(vertexSet, labels)
for t in triplets:
    print(t)

[['Zest Airways, Inc.', 'Asian Spirit and Zest Air', 'AirAsia Zest', 'AirAsia Zest'], ['P159', 'headquarters location'], ['Pasay City']]
[['Zest Airways, Inc.', 'Asian Spirit and Zest Air', 'AirAsia Zest', 'AirAsia Zest'], ['P17', 'country'], ['Philippines', 'Philippines', 'Republic of the Philippines']]
[['Zest Air', 'Zest Air'], ['P17', 'country'], ['Philippines', 'Philippines', 'Republic of the Philippines']]
[['Pasay City'], ['P17', 'country'], ['Philippines', 'Philippines', 'Republic of the Philippines']]
[['Pasay City'], ['P131', 'located in the administrative territorial entity'], ['Metro Manila']]
[['Philippines', 'Philippines', 'Republic of the Philippines'], ['P150', 'contains administrative territorial entity'], ['Metro Manila']]
[['Manila', 'Manila'], ['P17', 'country'], ['Philippines', 'Philippines', 'Republic of the Philippines']]
[['Metro Manila'], ['P150', 'contains administrative territorial entity'], ['Pasay City']]
[['Metro Manila'], ['P131', 'located in the administ

In [36]:
for s in sents:
    print(s)

Zest Airways , Inc. operated as AirAsia Zest ( formerly Asian Spirit and Zest Air ) , was a low - cost airline based at the Ninoy Aquino International Airport in Pasay City , Metro Manila in the Philippines .
It operated scheduled domestic and international tourist services , mainly feeder services linking Manila and Cebu with 24 domestic destinations in support of the trunk route operations of other airlines .
In 2013 , the airline became an affiliate of Philippines AirAsia operating their brand separately .
Its main base was Ninoy Aquino International Airport , Manila .
The airline was founded as Asian Spirit , the first airline in the Philippines to be run as a cooperative .
On August 16 , 2013 , the Civil Aviation Authority of the Philippines ( CAAP ) , the regulating body of the Government of the Republic of the Philippines for civil aviation , suspended Zest Air flights until further notice because of safety issues .
Less than a year after AirAsia and Zest Air 's strategic allian

In [38]:
model_name = 'dslim/distilbert-NER'
tokenizer = AutoTokenizer.from_pretrained(model_name)
ner_pipeline = pipeline('ner', model=model_name, tokenizer=tokenizer)

In [94]:
def combine_entities(entities):
    combined_entities = []
    i = 0
    while i < len(entities):
        current_entity = entities[i]
        if current_entity['entity'].startswith('B-'):
            entity_type = current_entity['entity'][2:]
            combined_entity = {
                'entity': entity_type,
                'score': current_entity['score'],
                'start': current_entity['start'],
                'end': current_entity['end'],
                'word': current_entity['word']
            }
            j = i + 1
            while j < len(entities):
                if entities[j]['word'] == '-':
                    combined_entity['word'] += entities[j]['word']
                    combined_entity['end'] = entities[j]['end']
                    combined_entity['score'] = min(combined_entity['score'], entities[j]['score'])
                    j += 1
                elif entities[j]['entity'] == f'I-{entity_type}' and (entities[j]['start'] == combined_entity['end'] + 1):
                    combined_entity['word'] += ' ' + entities[j]['word']
                    combined_entity['end'] = entities[j]['end']
                    combined_entity['score'] = min(combined_entity['score'], entities[j]['score'])
                    j += 1
                elif (entities[j-1]['word'] == '-'):
                    combined_entity['word'] += entities[j]['word']
                    combined_entity['end'] = entities[j]['end']
                    combined_entity['score'] = min(combined_entity['score'], entities[j]['score'])
                    j += 1
                else:
                    break
            combined_entities.append(combined_entity)
            i = j
        else:
            i += 1
    return combined_entities

In [95]:
def merge_result(entities, model_name):
    merged_entities = []
    current = None

    if model_name not in ['dslim/bert-base-NER', 'dslim/distilbert-NER']:
        raise ValueError('NER model not compatible.')

    for entity in entities:
        if current == None:
            current = entity
        else:
            if entity['word'].startswith('##') and entity['start'] == current['end']:
                current['word'] += entity['word'][2:]
                current['end'] = entity['end']
                current['score'] = min(current['score'], entity['score'])
            elif entity['start'] == current['end'] and entity['entity'][2:] == current['entity'][2:]:
                current['word'] += entity['word']
                current['end'] = entity['end']
                current['score'] = min(current['score'], entity['score'])
            elif entity['start'] + 1 == current['end'] and entity['entity'][2:] == current['entity'][2:]:
                current['word'] += ' ' + entity['word']
                current['end'] = entity['end']
                current['score'] = min(current['score'], entity['score'])
            else:
                merged_entities.append(current)
                current = entity
    if current is not None:
        merged_entities.append(current)
    
    return merged_entities

In [174]:
def extract_entities(sents, ner_pipeline):
    all_entities = []
    for i, sent in enumerate(sents):
        ner_result = ner_pipeline(sent)
        merged_result = merge_result(ner_result, model_name)
        joined_result = combine_entities(merged_result)
        all_entities.append(joined_result)

    return all_entities


In [175]:
entities = extract_entities(sents, ner_pipeline)
for e in entities:
    print(e)

[{'entity': 'ORG', 'score': 0.4994115, 'start': 0, 'end': 19, 'word': 'Zest Airways , Inc.'}, {'entity': 'ORG', 'score': 0.94523096, 'start': 32, 'end': 44, 'word': 'AirAsia Zest'}, {'entity': 'ORG', 'score': 0.7765905, 'start': 56, 'end': 72, 'word': 'Asian Spirit and'}, {'entity': 'ORG', 'score': 0.7527509, 'start': 73, 'end': 81, 'word': 'Zest Air'}, {'entity': 'LOC', 'score': 0.94873, 'start': 124, 'end': 158, 'word': 'Ninoy Aquino International Airport'}, {'entity': 'LOC', 'score': 0.9870161, 'start': 162, 'end': 172, 'word': 'Pasay City'}, {'entity': 'LOC', 'score': 0.9921524, 'start': 175, 'end': 187, 'word': 'Metro Manila'}, {'entity': 'LOC', 'score': 0.97528344, 'start': 195, 'end': 206, 'word': 'Philippines'}]
[{'entity': 'LOC', 'score': 0.998137, 'start': 99, 'end': 105, 'word': 'Manila'}, {'entity': 'LOC', 'score': 0.99781895, 'start': 110, 'end': 114, 'word': 'Cebu'}]
[{'entity': 'ORG', 'score': 0.97658587, 'start': 45, 'end': 64, 'word': 'Philippines AirAsia'}]
[{'entity'

In [142]:
def tag_sents(sents, entities):
    '''
    Tags sentences with B-<entity type> at the start and E-<entity type> at the end of each identified entity.

    Args:
        sents (list): List of sentences (strings) to process for named entity tagging.
        entities (list): List of lists containing entities for each sentence.

    Returns:
        list: List of tagged sentences.
    '''
    tagged_sents = []
    for i, sent in enumerate(sents):
        tagged_sent = sent
        offset = 0
        for entity in entities[i]:
            start = entity['start'] + offset
            end = entity['end'] + offset
            word = entity['word']
            b_tag = '<B-' + entity['entity'] + '> '
            e_tag = ' <E-' + entity['entity'] + '>'
            tagged_sent = tagged_sent[:start] + b_tag + word + e_tag + tagged_sent[end:]
            offset += len(b_tag) + len(e_tag)
        tagged_sents.append(tagged_sent)
    tagged_text = " [SEP] ".join(tagged_sents)

    return tagged_sents, tagged_text
        

In [176]:
tagged_sents, tagged_text = tag_sents(sents, entities)

for t in tagged_sents:
    print(t)

<B-ORG> Zest Airways , Inc. <E-ORG> operated as <B-ORG> AirAsia Zest <E-ORG> ( formerly <B-ORG> Asian Spirit and <E-ORG> <B-ORG> Zest Air <E-ORG> ) , was a low - cost airline based at the <B-LOC> Ninoy Aquino International Airport <E-LOC> in <B-LOC> Pasay City <E-LOC> , <B-LOC> Metro Manila <E-LOC> in the <B-LOC> Philippines <E-LOC> .
It operated scheduled domestic and international tourist services , mainly feeder services linking <B-LOC> Manila <E-LOC> and <B-LOC> Cebu <E-LOC> with 24 domestic destinations in support of the trunk route operations of other airlines .
In 2013 , the airline became an affiliate of <B-ORG> Philippines AirAsia <E-ORG> operating their brand separately .
Its main base was <B-LOC> Ninoy Aquino International Airport <E-LOC> , <B-LOC> Manila <E-LOC> .
The airline was founded as <B-ORG> Asian Spirit <E-ORG> , the first airline in the <B-LOC> Philippines <E-LOC> to be run as a cooperative .
On August 16 , 2013 , the <B-ORG> Civil Aviation Authority of the Philipp

In [150]:
t = tagged_text.split(' ')
print(len(t))

250


In [151]:
result = [{'entity': 'B-ORG', 'score': 0.9959073, 'index': 1, 'word': 'Z', 'start': 0, 'end': 1}, {'entity': 'B-ORG', 'score': 0.997733, 'index': 2, 'word': '##est', 'start': 1, 'end': 4}, {'entity': 'I-ORG', 'score': 0.99700874, 'index': 3, 'word': 'Airways', 'start': 5, 'end': 12}, {'entity': 'I-ORG', 'score': 0.4994115, 'index': 4, 'word': ',', 'start': 13, 'end': 14}, {'entity': 'I-ORG', 'score': 0.8669601, 'index': 5, 'word': 'Inc', 'start': 15, 'end': 18}, {'entity': 'I-ORG', 'score': 0.90643686, 'index': 6, 'word': '.', 'start': 18, 'end': 19}, {'entity': 'B-ORG', 'score': 0.99389356, 'index': 9, 'word': 'Air', 'start': 32, 'end': 35}, {'entity': 'B-ORG', 'score': 0.94523096, 'index': 10, 'word': '##As', 'start': 35, 'end': 37}, {'entity': 'B-ORG', 'score': 0.9889908, 'index': 11, 'word': '##ia', 'start': 37, 'end': 39}, {'entity': 'I-ORG', 'score': 0.9769206, 'index': 12, 'word': 'Z', 'start': 40, 'end': 41}, {'entity': 'I-ORG', 'score': 0.9663055, 'index': 13, 'word': '##est', 'start': 41, 'end': 44}, {'entity': 'B-ORG', 'score': 0.9960812, 'index': 16, 'word': 'Asian', 'start': 56, 'end': 61}, {'entity': 'I-ORG', 'score': 0.99422354, 'index': 17, 'word': 'Spirit', 'start': 62, 'end': 68}, {'entity': 'I-ORG', 'score': 0.7765905, 'index': 18, 'word': 'and', 'start': 69, 'end': 72}, {'entity': 'B-ORG', 'score': 0.7527509, 'index': 19, 'word': 'Z', 'start': 73, 'end': 74}, {'entity': 'B-ORG', 'score': 0.92475957, 'index': 20, 'word': '##est', 'start': 74, 'end': 77}, {'entity': 'I-ORG', 'score': 0.99716705, 'index': 21, 'word': 'Air', 'start': 78, 'end': 81}, {'entity': 'B-LOC', 'score': 0.96416926, 'index': 33, 'word': 'Ni', 'start': 124, 'end': 126}, {'entity': 'B-LOC', 'score': 0.9514548, 'index': 34, 'word': '##noy', 'start': 126, 'end': 129}, {'entity': 'I-LOC', 'score': 0.98231596, 'index': 35, 'word': 'Aquino', 'start': 130, 'end': 136}, {'entity': 'I-LOC', 'score': 0.9787137, 'index': 36, 'word': 'International', 'start': 137, 'end': 150}, {'entity': 'I-LOC', 'score': 0.94873, 'index': 37, 'word': 'Airport', 'start': 151, 'end': 158}, {'entity': 'B-LOC', 'score': 0.9940953, 'index': 39, 'word': 'Pa', 'start': 162, 'end': 164}, {'entity': 'B-LOC', 'score': 0.9870161, 'index': 40, 'word': '##say', 'start': 164, 'end': 167}, {'entity': 'I-LOC', 'score': 0.9920316, 'index': 41, 'word': 'City', 'start': 168, 'end': 172}, {'entity': 'B-LOC', 'score': 0.9921524, 'index': 43, 'word': 'Metro', 'start': 175, 'end': 180}, {'entity': 'I-LOC', 'score': 0.9947172, 'index': 44, 'word': 'Manila', 'start': 181, 'end': 187}, {'entity': 'B-LOC', 'score': 0.97528344, 'index': 47, 'word': 'Philippines', 'start': 195, 'end': 206}]
merged = merge_result(result, model_name)
joined = combine_entities(merged)

print('joined:')
for j in joined:
    print(j)


joined:
{'entity': 'ORG', 'score': 0.4994115, 'start': 0, 'end': 19, 'word': 'Zest Airways , Inc.'}
{'entity': 'ORG', 'score': 0.94523096, 'start': 32, 'end': 44, 'word': 'AirAsia Zest'}
{'entity': 'ORG', 'score': 0.7765905, 'start': 56, 'end': 72, 'word': 'Asian Spirit and'}
{'entity': 'ORG', 'score': 0.7527509, 'start': 73, 'end': 81, 'word': 'Zest Air'}
{'entity': 'LOC', 'score': 0.94873, 'start': 124, 'end': 158, 'word': 'Ninoy Aquino International Airport'}
{'entity': 'LOC', 'score': 0.9870161, 'start': 162, 'end': 172, 'word': 'Pasay City'}
{'entity': 'LOC', 'score': 0.9921524, 'start': 175, 'end': 187, 'word': 'Metro Manila'}
{'entity': 'LOC', 'score': 0.97528344, 'start': 195, 'end': 206, 'word': 'Philippines'}


In [141]:
for sent in sents:
    print(sent)

for sent in tagged_sents:
    print(sent)

Zest Airways , Inc. operated as AirAsia Zest ( formerly Asian Spirit and Zest Air ) , was a low - cost airline based at the Ninoy Aquino International Airport in Pasay City , Metro Manila in the Philippines .
It operated scheduled domestic and international tourist services , mainly feeder services linking Manila and Cebu with 24 domestic destinations in support of the trunk route operations of other airlines .
In 2013 , the airline became an affiliate of Philippines AirAsia operating their brand separately .
Its main base was Ninoy Aquino International Airport , Manila .
The airline was founded as Asian Spirit , the first airline in the Philippines to be run as a cooperative .
On August 16 , 2013 , the Civil Aviation Authority of the Philippines ( CAAP ) , the regulating body of the Government of the Republic of the Philippines for civil aviation , suspended Zest Air flights until further notice because of safety issues .
Less than a year after AirAsia and Zest Air 's strategic allian

In [164]:
print(len(tagged_text.split(' ')))

inputs = tokenizer(
    tagged_text,
    padding='max_length',
    truncation=True, 
    max_length=512,
    return_tensors='pt'
)

print("Attention Mask Length:", len(inputs["attention_mask"][0]))
print("Number of Padding Tokens:", inputs["attention_mask"][0].tolist().count(0))


250
Attention Mask Length: 512
Number of Padding Tokens: 41


In [170]:
text = ' [SEP] '.join(sents)
print(text)
print(len(text.split(' ')))

inputs = tokenizer(
    text,
    padding='max_length',
    truncation=True, 
    max_length=512,
    return_tensors='pt'
)

print("Attention Mask Length:", len(inputs["attention_mask"][0]))
print("Number of Padding Tokens:", inputs["attention_mask"][0].tolist().count(0))

Zest Airways , Inc. operated as AirAsia Zest ( formerly Asian Spirit and Zest Air ) , was a low - cost airline based at the Ninoy Aquino International Airport in Pasay City , Metro Manila in the Philippines . [SEP] It operated scheduled domestic and international tourist services , mainly feeder services linking Manila and Cebu with 24 domestic destinations in support of the trunk route operations of other airlines . [SEP] In 2013 , the airline became an affiliate of Philippines AirAsia operating their brand separately . [SEP] Its main base was Ninoy Aquino International Airport , Manila . [SEP] The airline was founded as Asian Spirit , the first airline in the Philippines to be run as a cooperative . [SEP] On August 16 , 2013 , the Civil Aviation Authority of the Philippines ( CAAP ) , the regulating body of the Government of the Republic of the Philippines for civil aviation , suspended Zest Air flights until further notice because of safety issues . [SEP] Less than a year after AirA

In [209]:
import Levenshtein 

def are_similar(e1, e2, threshold=0.7):
    ratio = Levenshtein.ratio(e1[0], e2[0])
    if ratio > threshold and e1[1] == e2[1]:
        return True
    return False

def make_pairs(entities):
    entities_flattened = [[item['word'], item['entity']] for entity in entities for item in entity]
    length = len(entities_flattened)
    pairs = set()
    for i in range(length):
        for j in range(length):
            if i != j:
                e1 = entities_flattened[i]
                e2 = entities_flattened[j]
                pair = (tuple(e1), tuple(e2))
                if not are_similar(e1, e2):
                    pairs.add(pair)
                else:
                    print(pair)
    return list(pairs)

pairs = make_pairs(entities)

print('pairs:')
for p in pairs:
    print(p)

(('AirAsia Zest', 'ORG'), ('AirAsia', 'ORG'))
(('AirAsia Zest', 'ORG'), ('AirAsia Zest', 'ORG'))
(('Asian Spirit and', 'ORG'), ('Asian Spirit', 'ORG'))
(('Zest Air', 'ORG'), ('Zest Air', 'ORG'))
(('Zest Air', 'ORG'), ('Zest Air', 'ORG'))
(('Ninoy Aquino International Airport', 'LOC'), ('Ninoy Aquino International Airport', 'LOC'))
(('Philippines', 'LOC'), ('Philippines', 'LOC'))
(('Manila', 'LOC'), ('Manila', 'LOC'))
(('Ninoy Aquino International Airport', 'LOC'), ('Ninoy Aquino International Airport', 'LOC'))
(('Manila', 'LOC'), ('Manila', 'LOC'))
(('Asian Spirit', 'ORG'), ('Asian Spirit and', 'ORG'))
(('Philippines', 'LOC'), ('Philippines', 'LOC'))
(('Zest Air', 'ORG'), ('Zest Air', 'ORG'))
(('Zest Air', 'ORG'), ('Zest Air', 'ORG'))
(('AirAsia', 'ORG'), ('AirAsia Zest', 'ORG'))
(('AirAsia', 'ORG'), ('AirAsia Zest', 'ORG'))
(('Zest Air', 'ORG'), ('Zest Air', 'ORG'))
(('Zest Air', 'ORG'), ('Zest Air', 'ORG'))
(('AirAsia Zest', 'ORG'), ('AirAsia Zest', 'ORG'))
(('AirAsia Zest', 'ORG'), 

In [201]:
entities_flattened = [[item['word'], item['entity']] for entity in entities for item in entity]

for e in entities_flattened:
    print(e)

print('uniwue')

uniques = list(set(tuple(e) for e in entities_flattened))
for u in uniques:
    print(u)

['Zest Airways , Inc.', 'ORG']
['AirAsia Zest', 'ORG']
['Asian Spirit and', 'ORG']
['Zest Air', 'ORG']
['Ninoy Aquino International Airport', 'LOC']
['Pasay City', 'LOC']
['Metro Manila', 'LOC']
['Philippines', 'LOC']
['Manila', 'LOC']
['Cebu', 'LOC']
['Philippines AirAsia', 'ORG']
['Ninoy Aquino International Airport', 'LOC']
['Manila', 'LOC']
['Asian Spirit', 'ORG']
['Philippines', 'LOC']
['Civil Aviation Authority of the Philippines', 'ORG']
['CAAP', 'ORG']
['Government of the Republic of the Philippines', 'ORG']
['Zest Air', 'ORG']
['AirAsia', 'ORG']
['Zest Air', 'ORG']
['AirAsia Zest', 'ORG']
['AirAsia Philippines', 'ORG']
uniwue
('AirAsia Zest', 'ORG')
('AirAsia Philippines', 'ORG')
('Philippines AirAsia', 'ORG')
('Manila', 'LOC')
('AirAsia', 'ORG')
('Asian Spirit', 'ORG')
('Metro Manila', 'LOC')
('Philippines', 'LOC')
('Government of the Republic of the Philippines', 'ORG')
('Pasay City', 'LOC')
('Asian Spirit and', 'ORG')
('Zest Airways , Inc.', 'ORG')
('Civil Aviation Authorit