In [17]:
import glob
import xml.etree.ElementTree as ET
import re

In [18]:
class Argument(object):
    def __init__(self, id_, start, end, role, text):
        self.id_ = id_
        self.start = start
        self.end = end
        self.role = role
        self.text = text

    def to_string(self):
        return "Argument: {id_ = " + self.id_ + ", role = " + self.role + ", text = " + self.text + "}"
    
class Entity(object):
    def __init__(self, phrase_type, end, text, entity_type, start, id_):
        self.phrase_type = phrase_type
        self.end = end
        self.text = text
        self.entity_type = entity_type
        self.start = start
        self.id_ = id_

    def to_string(self):
        return "Entity: {id_ = " + self.id_ + ", entity_type = " + self.entity_type + ", text = " + self.text + ", phrase_type=" + self.phrase_type +"}"

class Sentence(object):
    def __init__(self, text, start, end):
        self.text = text
        self.start = start
        self.end = end

    def to_string(self):
        return "Sentence: {text = " + self.text + ", start = " + self.start + ", end = " + self.end + "}"
    
    def __str__(self):
        return str(self.__dict__)

    def __eq__(self, other): 
        return self.__dict__ == other.__dict__
    
    def __hash__(self):
        return hash(self)

class Event(object):
    def __init__(self, event_id, mention_id, type_, subtype, modality, polarity, genericity, tense, extent, extent_start, extent_end, scope, trig_text, trig_start, trig_end, arguments, entities):
        self.event_id = event_id
        self.mention_id = mention_id
        self.type_ = type_
        self.subtype = subtype
        self.modality = modality
        self.polarity = polarity
        self.genericity = genericity
        self.tense = tense
        self.extent = extent
        self.extent_start = extent_start
        self.extent_end = extent_end
        self.scope = scope
        self.trig_text = trig_text
        self.trig_start = trig_start
        self.trig_end = trig_end
        self.arguments = arguments
        self.entities = entities


    def to_string(self):
        return "Event: { event_id = " + self.event_id + "mention_id = " + self.mention_id + ", type = " + self.type_ + ", subtype = " +self.subtype + ", modality = " \
               + self.modality + ", polarity = " + self.polarity + ", genericity= " + self.genericity + ", tense = " +\
               self.tense + ", extent = " +self.extent + ", scope = " + self.scope  + ", trigger = " + self.trigger


In [146]:
def extract_entity_info(entity, scope_start, scope_end, extent, words_start, words_end, extent_start):
    entity_id = entity.attrib["ID"]
    phrase_type = entity.attrib["TYPE"] + ":" + entity.attrib["SUBTYPE"]
    entity_class = entity.attrib["CLASS"]

    entities = []
    for mention in entity.iter('entity_mention'):
        entity_type = mention.attrib["LDCTYPE"]
        for child in mention:
            if child.tag == "extent":
                for chil2 in child:
                    text = chil2.text
                    start = int(chil2.attrib["START"])
                    end = int(chil2.attrib["END"])
                    
                    list_end = list(words_end.keys())
                    list_start = list(words_start.keys())
            
                    a_end = words_end[findClosest(list_end, len(list_end), end)]
                    a_start = words_start[findClosest(list_start, len(list_start), start)]
                    
        if scope_start <= start and  scope_end >= end:              
            ent = Entity(phrase_type, a_end, text, entity_type, a_start, entity_id)
            entities.append(ent)
        
    return entities

In [147]:
class Word(object):
    def __init__(self, start, end):
        #self.str_ = str_
        self.start = start
        self.end = end

In [148]:
def find_positions(sent):
    parse, = dep_parser.raw_parse(sent)
    i = 0
    words_start = {}
    words_end = {}
    accu_sum = 0
    i = 1
    for part in parse.to_conll(4).split("\n"):
        if part != "":
            parts = part.split("\t")
            word = parts[0]
            word_start = accu_sum
            word_end = accu_sum + len(word)
            accu_sum += len(word) + 1
            words_start.update({word_start: i})
            words_end.update({word_end: i})
            i+=1
    return words_start, words_end

In [149]:
def extract_from_xml(root_path, language, domain):
    events = {}
    #print(root_path + language + "/" + domain + "/adj/*.apf.xml")
    files_processed = 0
    for file_name in sorted(glob.glob(root_path + language + "/" + domain + "/adj/*.apf.xml")):        
        # Get the event + argument annotation
        print("file_name=", file_name)
        files_processed += 1
        tree = ET.parse(file_name, ET.XMLParser(encoding='utf-8'))
        root = tree.getroot()
        
        for event in root.iter('event'):
            sent, ev = extract_event_info(root, event)
            if sent.text not in events:
                events.update({sent.text: [ev]}) 
            else:
                ev_list = events[sent.text]
                ev_list.append(ev)
                events.update({sent.text: ev_list})
                
        
    return events, files_processed

In [150]:
import math
def findClosest(arr, n, target): 
  
    # Corner cases 
    if (target <= arr[0]): 
        return arr[0] 
    if (target >= arr[n - 1]): 
        return arr[n - 1] 
  
    # Doing binary search 
    i = 0; j = n; mid = 0
    while (i < j):  
        mid = math.floor((i + j) / 2)
  
        if (arr[mid] == target): 
            return arr[mid] 
  
        # If target is less than array  
        # element, then search in left 
        if (target < arr[mid]) : 
  
            # If target is greater than previous 
            # to mid, return closest of two 
            if (mid > 0 and target > arr[mid - 1]): 
                return getClosest(arr[mid - 1], arr[mid], target) 
  
            # Repeat for left half  
            j = mid 
          
        # If target is greater than mid 
        else : 
            if (mid < n - 1 and target < arr[mid + 1]): 
                return getClosest(arr[mid], arr[mid + 1], target) 
                  
            # update i 
            i = mid + 1
          
    # Only single element left after search 
    return arr[mid] 

In [151]:
def getClosest(val1, val2, target): 
  
    if (target - val1 >= val2 - target): 
        return val2 
    else: 
        return val1 
  

In [152]:
def extract_event_info(root, event):
    event_id = event.attrib["ID"]
    event_type = event.attrib["TYPE"]
    subtype = event.attrib["SUBTYPE"]
    modality = event.attrib["MODALITY"]
    polarity = event.attrib["POLARITY"]
    genericity = event.attrib["GENERICITY"]
    tense = event.attrib["TENSE"]

    ## Looking at event mentions
    for mention in event.iter('event_mention'):
        mention_id = mention.attrib["ID"]
        for child in mention:
            if child.tag == "extent":
                for chil2 in child:
                    extent = chil2.text
                    extent_start = int(chil2.attrib["START"])
                    extent_end = int(chil2.attrib["END"]) 
                    words_start, words_end = find_positions(extent)
                
            elif child.tag == "ldc_scope":
                for chil2 in child:
                    scope = chil2.text
                    scope_start = int(chil2.attrib["START"])
                    scope_end = int(chil2.attrib["END"])
                sent = Sentence(scope, scope_start, scope_end)
                
            elif child.tag == "anchor":
                for chil2 in child:
                    trig_text = chil2.text
                    trig_start = int(chil2.attrib["START"]) - extent_start
                    trig_end = int(chil2.attrib["END"]) - extent_start
                    
                    list_end = list(words_end.keys())
                    list_start = list(words_start.keys())
                    
                    t_end = words_end[findClosest(list_end, len(list_end), trig_end)]
                    t_start = words_start[findClosest(list_start, len(list_start), trig_start)]
        

        arguments = []           
        for argument in mention.iter('event_mention_argument'):
            arg_id = argument.attrib["REFID"]
            role = argument.attrib["ROLE"]
            for child in argument:
                for chil2 in child:
                    arg_text = chil2.text
                    arg_start = int(chil2.attrib["START"]) - extent_start
                    arg_end = int(chil2.attrib["END"]) - extent_start
                    
                    list_end = list(words_end.keys())
                    list_start = list(words_start.keys())
            
                    a_end = words_end[findClosest(list_end, len(list_end), arg_end)]
                    a_start = words_start[findClosest(list_start, len(list_start), arg_start)]
                        
            arg = Argument(arg_id, a_start, a_end, role, arg_text)

            arguments.append(arg)
       
        ## Looking at entity mentions with that same event
        entities = []
        for entity in root.iter('entity'):
            entities.extend(extract_entity_info(entity, scope_start, scope_end, extent, words_start, words_end, extent_start))

    ev = Event(event_id, mention_id, event_type, subtype, modality, polarity, genericity, tense, extent, extent_start, extent_end, scope, trig_text, t_start, t_end, arguments, entities)

    return sent, ev

In [153]:
#data_path = "/Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/ACE/Raw/LDC2006T06/data/English/bc/adj/"
root_path = "/Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/"

In [154]:
languages = [file_.split("/")[-1] for file_ in glob.glob(root_path + "*") if "Icon\r" not in file_]

In [155]:
events_list_lang = {}

def merge_two_dicts(x, y):
    z = x.copy()   # start with x's keys and values
    z.update(y)    # modifies z with y's keys and values & returns None
    return z

for language in languages:
    files_num = 0
    domains = [file_.split("/")[-1] for file_ in glob.glob(root_path + language + "/*" ) if "Icon\r" not in file_]
    events_lang = {}
    for domain in domains:
        events, files_processed = extract_from_xml(root_path, language, domain)
        files_num += files_processed
        
        events_lang = merge_two_dicts(events_lang, events)
    
    print("Number of files processed for language= ", language, " is= ", files_num)
    
    events_list_lang.update({language: events_lang})

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/un/adj/Austin-Grad-Community_20050212.2454.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/un/adj/Integritas-Group-Community-Forum_20050110.0557.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/un/adj/alt.atheism_20041104.2428.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/un/adj/alt.collecting.autographs_20050224.2438.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/un/adj/alt.corel_20041228.0503.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/un

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bc/adj/CNN_CF_20030305.1900.02.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bc/adj/CNN_CF_20030305.1900.06-1.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bc/adj/CNN_CF_20030305.1900.06-2.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bc/adj/CNN_IP_20030328.1600.07.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bc/adj/CNN_IP_20030329.1600.00-2.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bc/adj/CNN_IP_20030329.1600.00-3.apf.xml
file_nam

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/cts/adj/fsh_29226.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/cts/adj/fsh_29272.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/cts/adj/fsh_29302.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/cts/adj/fsh_29303.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/cts/adj/fsh_29326.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/cts/adj/fsh_29336.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/A

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/wl/adj/BACONSREBELLION_20050127.1017.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/wl/adj/BACONSREBELLION_20050204.1326.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/wl/adj/BACONSREBELLION_20050205.1919.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/wl/adj/BACONSREBELLION_20050206.1345.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/wl/adj/BACONSREBELLION_20050209.0721.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/wl/adj/BACONSREBELLION_20

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/wl/adj/MARKETVIEW_20050212.1717.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/wl/adj/MARKETVIEW_20050214.2115.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/wl/adj/MARKETVIEW_20050215.1858.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/wl/adj/MARKETVIEW_20050216.2120.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/wl/adj/MARKETVIEW_20050217.2115.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/wl/adj/MARKETVIEW_20050222.0729.apf.xml
file_name=

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNNHL_ENG_20030616_230155.7.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNNHL_ENG_20030618_230303.36.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNNHL_ENG_20030618_230303.6.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNNHL_ENG_20030624_133331.33.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNNHL_ENG_20030624_230338.34.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNNHL_ENG_20030625_230351

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNN_ENG_20030418_083040.11.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNN_ENG_20030418_130831.5.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNN_ENG_20030418_163834.14.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNN_ENG_20030421_090007.11.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNN_ENG_20030421_120508.13.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNN_ENG_20030421_120508.17.apf.xml

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNN_ENG_20030516_090022.7.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNN_ENG_20030516_123543.8.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNN_ENG_20030524_143511.4.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNN_ENG_20030525_143522.8.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNN_ENG_20030525_160525.13.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNN_ENG_20030526_133535.4.apf.xml
fil

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNN_ENG_20030617_173115.22.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNN_ENG_20030617_193116.10.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNN_ENG_20030618_065839.11.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNN_ENG_20030618_150128.5.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNN_ENG_20030618_150128.6.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/bn/adj/CNN_ENG_20030619_115954.10.apf.xml


file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/nw/adj/AFP_ENG_20030617.0846.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/nw/adj/AFP_ENG_20030630.0271.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/nw/adj/APW_ENG_20030304.0555.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/nw/adj/APW_ENG_20030306.0191.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/nw/adj/APW_ENG_20030308.0314.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/nw/adj/APW_ENG_20030310.0719.apf.xml
file_name= /Users/d22admin/U

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/nw/adj/XIN_ENG_20030610.0299.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/nw/adj/XIN_ENG_20030616.0274.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/English/nw/adj/XIN_ENG_20030624.0085.apf.xml
Number of files processed for language=  English  is=  535
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/wl/adj/DAVYZW_20041223.1020.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/wl/adj/DAVYZW_20041227.1120.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/wl/adj/D

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/wl/adj/LIUYIFENG_20050123.1752.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/wl/adj/LIUYIFENG_20050123.1754.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/wl/adj/LIUYIFENG_20050124.1721.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/wl/adj/LIUYIFENG_20050124.1722.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/wl/adj/LIUYIFENG_20050124.1812.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/wl/adj/LIUYIFENG_20050124.1819.apf.xml
file_name= /User

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CBS20001021.1000.0347.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CBS20001021.1000.0734.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CBS20001023.1000.1067.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CBS20001026.1000.0053.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CBS20001027.1000.0518.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CBS20001030.1000.0617.apf.xml
file_name= /Users/d22admin/U

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CNR20001001.1700.0426.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CNR20001002.1700.0638.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CNR20001003.1700.1240.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CNR20001004.1700.0727.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CNR20001005.1700.1087.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CNR20001008.1700.0510.apf.xml
file_name= /Users/d22admin/U

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CTS20001017.1800.0697.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CTS20001018.0900.0879.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CTS20001019.1300.0508.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CTS20001019.1300.0638.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CTS20001020.1800.0678.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CTS20001020.1800.0740.apf.xml
file_name= /Users/d22admin/U

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CTS20001121.1300.0621.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CTS20001121.1300.0689.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CTS20001122.1300.0271.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CTS20001123.1300.0100.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CTS20001124.1300.0705.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CTS20001125.1300.0727.apf.xml
file_name= /Users/d22admin/U

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CTV20001106.1330.1457.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CTV20001110.1330.0231.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CTV20001110.1330.1507.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CTV20001114.1330.0505.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CTV20001114.1330.1509.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/CTV20001114.1330.1559.apf.xml
file_name= /Users/d22admin/U

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/VOM20001028.1800.3128.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/VOM20001115.0700.0140.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/VOM20001115.0700.1089.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/VOM20001118.0700.1605.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/VOM20001123.0700.0261.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/bn/adj/VOM20001124.0700.0099.apf.xml
file_name= /Users/d22admin/U

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/XIN20001008.1400.0097.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/XIN20001009.0200.0005.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/XIN20001009.0800.0048.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/XIN20001009.0800.0058.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/XIN20001010.1400.0104.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/XIN20001012.0800.0081.apf.xml
file_name= /Users/d22admin/U

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/XIN20001112.0800.0040.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/XIN20001113.0800.0051.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/XIN20001114.0200.0012.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/XIN20001114.0200.0017.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/XIN20001114.0200.0027.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/XIN20001114.0800.0046.apf.xml
file_name= /Users/d22admin/U

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/XIN20001210.0800.0043.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/XIN20001212.0200.0015.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/XIN20001212.1400.0120.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/XIN20001213.0800.0063.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/XIN20001214.0200.0014.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/XIN20001215.0200.0008.apf.xml
file_name= /Users/d22admin/U

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/ZBN20001003.0400.0013.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/ZBN20001007.1300.0047.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/ZBN20001008.0400.0004.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/ZBN20001011.0400.0018.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/ZBN20001017.0400.0003.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Chinese/nw/adj/ZBN20001020.1300.0060.apf.xml
file_name= /Users/d22admin/U

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/wl/adj/AZZAMOH_20041204.1015.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/wl/adj/AZZAMOH_20041220.0328.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/wl/adj/AZZAMOH_20041227.0120.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/wl/adj/BANYADAM_20050225.0158.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/wl/adj/DIGRESSING_20041101.1921.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/wl/adj/DIGRESSING_20041107.0106.apf.xml
file_name= /Users/d22admin/

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/bn/adj/NTV20001004.1530.0868.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/bn/adj/NTV20001006.1530.0102.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/bn/adj/NTV20001006.1530.0164.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/bn/adj/NTV20001009.1530.0451.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/bn/adj/NTV20001009.1530.1128.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/bn/adj/NTV20001009.1530.1277.apf.xml
file_name= /Users/d22admin/USCGDri

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/bn/adj/VAR20001001.1300.3153.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/bn/adj/VAR20001001.1300.3322.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/bn/adj/VAR20001002.1300.0227.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/bn/adj/VAR20001002.1300.0442.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/bn/adj/VAR20001002.1300.0631.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/bn/adj/VAR20001002.1300.1422.apf.xml
file_name= /Users/d22admin/USCGDri

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/bn/adj/VAR20001130.1100.0593.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/bn/adj/VAR20001130.1100.1029.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/bn/adj/VAR20001205.1100.0950.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/bn/adj/VAR20001207.1100.1838.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/bn/adj/VAR20001209.1100.0312.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/bn/adj/VAR20001210.1100.0069.apf.xml
file_name= /Users/d22admin/USCGDri

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/AFA20001107.1000.0042.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/AFA20001107.1400.0087.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/AFA20001108.1000.0093.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/AFA20001108.1800.0195.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/AFA20001109.1000.0026.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/AFA20001110.1400.0090.apf.xml
file_name= /Users/d22admin/USCGDri

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/AFA20001208.1000.0032.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/AFA20001208.1000.0038.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/AFA20001208.1000.0046.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/AFA20001208.1000.0059.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/AFA20001208.1000.0065.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/AFA20001208.1400.0073.apf.xml
file_name= /Users/d22admin/USCGDri

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/ALH20001014.1300.0100.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/ALH20001014.1300.0106.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/ALH20001014.1300.0111.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/ALH20001014.1300.0116.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/ALH20001015.0100.0005.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/ALH20001015.0100.0018.apf.xml
file_name= /Users/d22admin/USCGDri

file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/ALH20001125.0700.0024.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/ALH20001128.1300.0081.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/ALH20001201.1300.0071.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/ALH20001201.1900.0126.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/ALH20001209.0700.0031.apf.xml
file_name= /Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/Raw/LDC2006T06/data/Arabic/nw/adj/ALH20001210.1300.0101.apf.xml
file_name= /Users/d22admin/USCGDri

## Stanford CoreNLP Preprocessing:
Generating the needed attributes:

- words, lemmas, pos-tags
- stanford-colcc

In [6]:
from nltk.parse.corenlp import CoreNLPDependencyParser, CoreNLPParser
dep_parser = CoreNLPDependencyParser(url='http://localhost:9001')
#parse, = dep_parser.raw_parse('The quick brown fox jumps over the lazy dog.')
sent = "No, they don\'t the 'suspicious evidence' is simply that\nArafat did not look particularly near death when he was\nremoved from his HQ"
parse, = dep_parser.raw_parse(sent)

In [4]:
print(parse)

defaultdict(<function DependencyGraph.__init__.<locals>.<lambda> at 0x1a1ef88510>,
            {0: {'address': 0,
                 'ctag': 'TOP',
                 'deps': defaultdict(<class 'list'>, {'ROOT': [4]}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': 'TOP',
                 'word': None},
             1: {'address': 1,
                 'ctag': 'DT',
                 'deps': defaultdict(<class 'list'>, {}),
                 'feats': '_',
                 'head': 4,
                 'lemma': 'no',
                 'rel': 'discourse',
                 'tag': 'DT',
                 'word': 'No'},
             2: {'address': 2,
                 'ctag': ',',
                 'deps': defaultdict(<class 'list'>, {}),
                 'feats': '_',
                 'head': 4,
                 'lemma': ',',
                 'rel': 'punct',
                 'tag': ',',
                

In [4]:
print(parse.tokenBeginIndex)

AttributeError: 'DependencyGraph' object has no attribute 'tokenBeginIndex'

In [30]:
def find_stanford_colcc(sent):
    parse, = dep_parser.raw_parse(sent)
    i = 0
    pos_tags = []
    words = []
    triples = []
    for part in parse.to_conll(4).split("\n"):
        if part != "":
            parts = part.split("\t")
            words.append(parts[0])
            pos_tags.append(parts[1])
            rel = parts[3].lower()
            gov = int(parts[2])-1
            dep = i
            i += 1

            triples.append(rel+"/dep="+str(dep)+"/gov="+str(gov))
    return triples, words, pos_tags

In [46]:
find_positions(sent)

{1: ['A', 0, 1],
 2: ['statement', 2, 11],
 3: ['issued', 12, 18],
 4: ['at', 19, 21],
 5: ['the', 22, 25],
 6: ['two-day', 26, 33],
 7: ['meeting', 34, 41],
 8: ['of', 42, 44],
 9: ['the', 45, 48],
 10: ['EU', 49, 51],
 11: ['foreign', 52, 59],
 12: ['ministers', 60, 69],
 13: ['called', 70, 76],
 14: ['on', 77, 79],
 15: ['Iran', 80, 84],
 16: ['to', 85, 87],
 17: ['conclude', 88, 96],
 18: ['and', 97, 100],
 19: ['implement', 101, 110],
 20: ['urgently', 111, 119],
 21: ['and', 120, 123],
 22: ['unconditionally', 124, 139],
 23: ['an', 140, 142],
 24: ['additional', 143, 153],
 25: ['protocol', 154, 162],
 26: ['to', 163, 165],
 27: ['the', 166, 169],
 28: ['Nuclear', 170, 177],
 29: ['Non', 178, 181],
 30: ['-', 182, 183],
 31: ['Proliferation', 184, 197],
 32: ['Treaty', 198, 204],
 33: ['allowing', 205, 213],
 34: ['for', 214, 217],
 35: ['short-notice', 218, 230],
 36: ['inspections', 231, 242],
 37: ['of', 243, 245],
 38: ['suspected', 246, 255],
 39: ['nuclear', 256, 263],
 40

In [41]:
find_stanford_colcc(sent)

(['det/dep=0/gov=1',
  'root/dep=1/gov=-1',
  'acl/dep=2/gov=1',
  'case/dep=3/gov=6',
  'det/dep=4/gov=6',
  'amod/dep=5/gov=6',
  'nmod/dep=6/gov=2',
  'case/dep=7/gov=11',
  'det/dep=8/gov=11',
  'compound/dep=9/gov=11',
  'amod/dep=10/gov=11',
  'nmod/dep=11/gov=6',
  'acl/dep=12/gov=11',
  'case/dep=13/gov=14',
  'nmod/dep=14/gov=12',
  'mark/dep=15/gov=16',
  'xcomp/dep=16/gov=12',
  'cc/dep=17/gov=16',
  'conj/dep=18/gov=16',
  'advmod/dep=19/gov=24',
  'cc/dep=20/gov=19',
  'conj/dep=21/gov=19',
  'det/dep=22/gov=24',
  'amod/dep=23/gov=24',
  'dobj/dep=24/gov=16',
  'case/dep=25/gov=28',
  'det/dep=26/gov=28',
  'amod/dep=27/gov=28',
  'nmod/dep=28/gov=24',
  'punct/dep=29/gov=24',
  'compound/dep=30/gov=31',
  'dep/dep=31/gov=24',
  'acl/dep=32/gov=31',
  'case/dep=33/gov=35',
  'amod/dep=34/gov=35',
  'nmod/dep=35/gov=32',
  'case/dep=36/gov=39',
  'amod/dep=37/gov=39',
  'amod/dep=38/gov=39',
  'nmod/dep=39/gov=35',
  'case/dep=40/gov=42',
  'det/dep=41/gov=42',
  'nmod/dep

In [31]:
print(parse.to_conll(4))

The	DT	2	det
skeleton	NN	9	nsubjpass
of	IN	6	case
a	DT	6	det
second	JJ	6	amod
baby	NN	2	nmod
has	VBZ	9	aux
been	VBN	9	auxpass
found	VBN	33	ccomp
at	IN	14	case
a	DT	14	det
rural	JJ	14	amod
Wisconsin	NNP	14	compound
home	NN	9	nmod
where	WRB	23	advmod
a	DT	18	det
22-year-old	JJ	18	amod
woman	NN	21	nmod:poss
's	POS	18	case
dead	JJ	21	amod
infant	NN	23	nsubjpass
was	VBD	23	auxpass
discovered	VBN	14	acl:relcl
in	IN	28	case
a	DT	28	det
blue	JJ	28	amod
plastic	NN	28	compound
container	NN	23	nmod
June	NNP	23	nmod:tmod
8	CD	29	nummod
,	,	33	punct
officials	NNS	33	nsubj
said	VBD	0	ROOT
Monday	NNP	33	nmod:tmod
.	.	33	punct



## Save to json:

In [156]:
import json
from tqdm import tqdm

data_json = []
sent_json = []
for sent in tqdm(events_list_lang["English"]):
    sent_json.append(sent)
    data_sub = {}
    data_sub["golden-event-mentions"] = []
    entities_unique = {}
    for event in events_list_lang["English"][sent]:
        event_info = {}
        #(self, event_id, mention_id, type_, subtype, modality, polarity, genericity, 
        # tense, extent, scope, trig_text, trig_start, trig_end, arguments):
 
        event_info["trigger"] = {"start": event.trig_start, "end": event.trig_end, "text": event.trig_text}
        event_info["arguments"] = []
        for arg in event.arguments:
            arg_info = {"start": arg.start, "role": arg.role, "end": arg.end, "text": arg.text}
            event_info["arguments"].append(arg_info)
            
        event_info["id"] = event.event_id
        event_info["event_type"] = event.type_
        data_sub["golden-event-mentions"].append(event_info) 
        
        # Loading entities for that event and adding it to the list of entities
        for entity in event.entities:
            entities_unique.update({entity.id_:entity})
            
    data_sub["golden-entity-mentions"] = []
    for entity_id in entities_unique.keys():
        entity_info = {"phrase-type": entities_unique[entity_id].phrase_type, "end": entities_unique[entity_id].end, "text": entities_unique[entity_id].text, "entity-type": entities_unique[entity_id].entity_type, "start": entities_unique[entity_id].start, "id": entity_id}
        data_sub["golden-entity-mentions"].append(entity_info)
       
    triples, words, pos_tags = find_stanford_colcc(sent)
    data_json.append({"stanford-colcc": triples, "golden-entity-mentions": data_sub["golden-entity-mentions"], "words": words, "pos-tags": pos_tags, "golden-event-mentions": data_sub["golden-event-mentions"]})

100%|██████████| 2590/2590 [00:48<00:00, 32.50it/s]


In [141]:
len(data_json)

2590

In [157]:
data_json[0]

{'stanford-colcc': ['nsubj/dep=0/gov=5',
  'aux/dep=1/gov=5',
  'cop/dep=2/gov=5',
  'det/dep=3/gov=5',
  'compound/dep=4/gov=5',
  'root/dep=5/gov=-1',
  'case/dep=6/gov=9',
  'det/dep=7/gov=9',
  'compound/dep=8/gov=9',
  'nmod/dep=9/gov=5',
  'punct/dep=10/gov=9',
  'amod/dep=11/gov=9',
  'mark/dep=12/gov=13',
  'advcl/dep=13/gov=11',
  'dobj/dep=14/gov=13',
  'case/dep=15/gov=18',
  'det/dep=16/gov=18',
  'compound/dep=17/gov=18',
  'nmod/dep=18/gov=13'],
 'golden-entity-mentions': [{'phrase-type': 'GPE:Population-Center',
   'end': 11,
   'text': 'Houston',
   'entity-type': 'NAMPRE',
   'start': 11,
   'id': 'Austin-Grad-Community_20050212.2454-E1'},
  {'phrase-type': 'ORG:Educational',
   'end': 11,
   'text': 'the Houston Center',
   'entity-type': 'NOM',
   'start': 11,
   'id': 'Austin-Grad-Community_20050212.2454-E2'},
  {'phrase-type': 'PER:Individual',
   'end': 11,
   'text': 'the Registration\nManager for the Houston Center',
   'entity-type': 'NOM',
   'start': 11,
   '

### Train/Dev/Test Split:

In [35]:
import random
random.shuffle(data_json)

In [158]:
with open('/Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/ace-05-splits/english/train.json', 'w') as outfile:
    json.dump(data_json[:2000], outfile)

In [159]:
with open('/Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/ace-05-splits/english/dev.json', 'w') as outfile:
    json.dump(data_json[2000:2250], outfile)

In [160]:
with open('/Users/d22admin/USCGDrive/Spring19/ISI/EventExtraction/3Datasets/EventsExtraction/ACE/ace-05-splits/english/test.json', 'w') as outfile:
    json.dump(data_json[2250:], outfile)

In [None]:
phrase_type, end, text, entity_type, start, id_

In [39]:
ev_example[1].trig_text

NameError: name 'ev_example' is not defined

In [24]:
events_list_lang["English"][6].scope

'It will have to be snail mail), give her a call at the center, or\nsend her a fax and welcome her to our Center'