In [None]:
import json
import os
import math
import re
import numpy as np

types = {"Island": "ISL", "State": "STAT", "Continent": "CONT", "City/town": "CITY", "Country": "CTRY",
        "County": "CNTY", "Neighborhood": "NBHD", "Road/street": "ST", "District": "DIST", "Other locations": "OTHR", 
        "Natural Point-of-Interest": "NPOI", "Human-made Point-of-Interest": "HPOI"}

en_events = ["california_wildfires_2018", "canada_wildfires_2016", "cyclone_idai_2019", "ecuador_earthquake_2016",
             "greece_wildfires_2018", "hurricane_dorian_2019", "hurricane_florence_2018", "hurricane_harvey_2017",
             "hurricane_irma_2017", "hurricane_maria_2017", "hurricane_matthew_2016", "italy_earthquake_aug_2016",
             "kaikoura_earthquake_2016", "kerala_floods_2018", "maryland_floods_2018", "midwestern_us_floods_2019",
             "pakistan_earthquake_2019", "puebla_mexico_earthquake_2017", "srilanka_floods_2017"]

ar_events = ["beirut_explosion_2020", "cairo_bombing_2019", "covid_2019", "dragon_storms_2020", 
             "hafr_albatin_floods_2019", "jordan_floods_2018", "kuwait_floods_2018"]


In [None]:
# reads the BILOU formatted data

def read_bilou(path):
    tokens = []
    labels = []
    t = []
    l = []
    
    for token in open(path, encoding='utf-8').read().splitlines(): 
        if token == '':
            tokens.append(t)
            labels.append(l)
            t = []
            l = []
            continue
        splits = token.split()
        t.append(splits[0])
        l.append(splits[1])
        
    if len(t) > 0 and len(l) > 0:
        tokens.append(t)
        labels.append(l) 
        
    return tokens, labels

In [None]:
def remove_punct(text):
    punct = re.compile(r'(\w+)')
    clean_text = ' '.join([m.group() for m in punct.finditer(text)])
    return clean_text

In [None]:
def parse_json(path):
    locs = []
    lines = open(path, encoding='utf-8').read().splitlines()
    for line in lines:
        tweet = json.loads(line)
        locs.append([remove_punct(lm["text"]).lower().replace(" ","") for lm in tweet["location_mentions"]])
    return locs

def parse_json_by_type(path, LOC):
    locs = []
    lines = open(path, encoding='utf-8').read().splitlines()
    for line in lines:
        tweet = json.loads(line)
        if LOC:
            locs.append([(remove_punct(lm["text"]).lower().replace(" ",""), "LOC") for lm in tweet["location_mentions"]])
        else:
            locs.append([(remove_punct(lm["text"]).lower().replace(" ",""), types[lm["type"]]) for lm in tweet["location_mentions"]])
    return locs

In [None]:
def parse_bilou(path):
    tokens, labels = read_bilou(path)
        
    locs = []

    for i in range(len(tokens)):
        loc = ""
        local_locs = []
        for j in range(len(tokens[i])):
            if "U-" in labels[i][j]:
                loc = tokens[i][j]#.replace(" ’", "’").replace(" - ", "-").replace("# ", "#").replace(" ,", ",").replace(" / ", "/")
                local_locs.append(remove_punct(loc).lower().replace(" ",""))
                loc = ""
            elif "B-" in labels[i][j]: #malformed BIO-LM will be ignored
                loc = tokens[i][j]
            elif "I-" in labels[i][j]:
                if loc == "": #malformed BIO-LM will be ignored
                    continue 
                loc += " " + tokens[i][j]
            elif "L-" in labels[i][j]:
                if loc == "": #malformed BIO-LM will be ignored
                    continue 
                loc += " " + tokens[i][j]
                #local_locs.append(loc.replace(" ’", "’").replace(" - ", "-").replace("# ", "#").replace(" ,", ",").replace(" / ", "/"))
                local_locs.append(remove_punct(loc).lower().replace(" ",""))
                loc = ""
            else:
                loc = ""  #malformed BIO-LM will be ignored
        locs.append(local_locs)
    return locs

def parse_bilou_by_type(path):
    tokens, labels = read_bilou(path)
    
    locs = []

    for i in range(len(tokens)):
        loc = ""
        local_locs = []
        for j in range(len(tokens[i])):
            if "U-" in labels[i][j]:
                loc = tokens[i][j]
                local_locs.append((remove_punct(loc).lower().replace(" ",""), labels[i][j].split("-")[1]))
                loc = ""
            elif "B-" in labels[i][j]: #malformed BIO-LM will be ignored
                loc = tokens[i][j]
            elif "I-" in labels[i][j]:
                if loc == "": #malformed BIO-LM will be ignored
                    continue 
                loc += " " + tokens[i][j]
            elif "L-" in labels[i][j]:
                if loc == "": #malformed BIO-LM will be ignored
                    continue
                loc += " " + tokens[i][j]
                local_locs.append((remove_punct(loc).lower().replace(" ",""), labels[i][j].split("-")[1]))
                loc = ""
            else:
                loc = ""  #malformed BIO-LM will be ignored
                
        locs.append(local_locs)
    return locs

In [None]:
def parse_bio(path):
    tokens, labels = read_bilou(path)
        
    locs = []

    for i in range(len(tokens)):
        loc = ""
        local_locs = []
        for j in range(len(tokens[i])):
            if labels[i][j] != "O":
                try:
                    if labels[i][j].split("-")[1] in ['PERS', 'ORG']:
                        continue
                except:
                    print(labels[i][j])
            
            if "B-" in labels[i][j]: #malformed BIO-LM will be ignored
                if loc != "":
                    local_locs.append(remove_punct(loc).lower().replace(" ",""))
                loc = tokens[i][j]
            elif "I-" in labels[i][j]:
                if loc == "": #malformed BIO-LM will be ignored
                    continue 
                loc += " " + tokens[i][j]
            else:
                if loc != "":
                    local_locs.append(remove_punct(loc).lower().replace(" ",""))
                loc = ""  #malformed BIO-LM will be ignored
        locs.append(local_locs)
    return locs

def parse_bio_by_type(path):
    tokens, labels = read_bilou(path)
    
    locs = []

    for i in range(len(tokens)):
        loc = ""
        local_locs = []
        for j in range(len(tokens[i])):
            if "B-" in labels[i][j]: #malformed BIO-LM will be ignored
                if loc != "":
                    local_locs.append((remove_punct(loc).lower().replace(" ",""), labels[i][j].split("-")[1]))
                loc = tokens[i][j]
            elif "I-" in labels[i][j]:
                if loc == "": #malformed BIO-LM will be ignored
                    continue 
                loc += " " + tokens[i][j]
            else:
                if loc != "":
                    local_locs.append((remove_punct(loc).lower().replace(" ",""), labels[i][j].split("-")[1]))
                loc = ""  #malformed BIO-LM will be ignored
        locs.append(local_locs)
    return locs

In [None]:
def parse_bilou(path):
    tokens, labels = read_bilou(path)
        
    locs = []

    for i in range(len(tokens)):
        loc = ""
        local_locs = []
        for j in range(len(tokens[i])):
            if "U-" in labels[i][j]:
                loc = tokens[i][j]#.replace(" ’", "’").replace(" - ", "-").replace("# ", "#").replace(" ,", ",").replace(" / ", "/")
                local_locs.append(remove_punct(loc).lower().replace(" ",""))
                loc = ""
            elif "B-" in labels[i][j]: #malformed BIO-LM will be ignored
                loc = tokens[i][j]
            elif "I-" in labels[i][j]:
                if loc == "": #malformed BIO-LM will be ignored
                    continue 
                loc += " " + tokens[i][j]
            elif "L-" in labels[i][j]:
                if loc == "": #malformed BIO-LM will be ignored
                    continue 
                loc += " " + tokens[i][j]
                #local_locs.append(loc.replace(" ’", "’").replace(" - ", "-").replace("# ", "#").replace(" ,", ",").replace(" / ", "/"))
                local_locs.append(remove_punct(loc).lower().replace(" ",""))
                loc = ""
            else:
                loc = ""  #malformed BIO-LM will be ignored
        locs.append(local_locs)
    return locs

def parse_bilou_by_type(path):
    tokens, labels = read_bilou(path)
    
    locs = []

    for i in range(len(tokens)):
        loc = ""
        local_locs = []
        for j in range(len(tokens[i])):
            if "U-" in labels[i][j]:
                loc = tokens[i][j]
                local_locs.append((remove_punct(loc).lower().replace(" ",""), labels[i][j].split("-")[1]))
                loc = ""
            elif "B-" in labels[i][j]: #malformed BIO-LM will be ignored
                loc = tokens[i][j]
            elif "I-" in labels[i][j]:
                if loc == "": #malformed BIO-LM will be ignored
                    continue 
                loc += " " + tokens[i][j]
            elif "L-" in labels[i][j]:
                if loc == "": #malformed BIO-LM will be ignored
                    continue
                loc += " " + tokens[i][j]
                local_locs.append((remove_punct(loc).lower().replace(" ",""), labels[i][j].split("-")[1]))
                loc = ""
            else:
                loc = ""  #malformed BIO-LM will be ignored
                
        locs.append(local_locs)
    return locs

In [None]:
def pad_unpredicted_seq(gl, pl):
    if len(gl) != len(pl):
        raise NameError("Found input variables with inconsistent numbers of samples! len(gold) = %d and len(pred) = %d" % (len(gl), len(pl)))

    for i in range(len(gl)):
        if len(pl[i]) < len(gl[i]):
            rem = len(gl[i]) - len(pl[i])
            for r in range(rem):
                pl[i].append("O")
    return pl

In [None]:
def extract_matrix_per_example(gold, pred):
    TPs = []
    FPs = []
    FNs = []
    
    for i in range(len(gold)):
        ''' FIXME find more efficient and correct'''
        
        # in some cases the prediction contains same LM twice while it appear once in gold.
        TP = []
        FP = []
        #TN = []
        FN = []
        for l in pred[i]:
            if l in gold[i]:
                if gold[i].count(l) == TP.count(l):
                    FP.append(l)
                    continue
                else: #l not in TP yet
                    TP.append(l)
            else:
                FP.append(l)
                
        TPs.append(TP)
        FPs.append(FP)
        
        for l in gold[i]:
            if l not in pred[i]:
                if pred[i].count(l) == FN.count(l):
                    #TN.append(l)
                    continue
                else: #l not in FN yet
                    FN.append(l)
            #else:
            #    TN.append(l)
                
        FNs.append(FN)
        
        
    #print(TPs)
    #print(FPs)
    #print(FNs)
    
        
    return TPs, FPs, FNs

def count_matrix_per_example(gold, pred):
    
    TPs, FPs, FNs = extract_matrix_per_example(gold, pred)
    TPs_counts = [len(tp) for tp in TPs]
    FPs_counts = [len(fp) for fp in FPs]
    FNs_counts = [len(fn) for fn in FNs]

    return TPs_counts, FPs_counts, FNs_counts

In [None]:
def compute_precision(gold, pred, mode, internal=False):
    
    if len(gold) != len(pred):
        raise NameError("Found input variables with inconsistent numbers of samples! len(gold) = %d and len(pred) = %d" 
                        % (len(gold), len(pred)))
    
    # FIXME count this ONCE for all eval measures!
    TPs, FPs, FNs = count_matrix_per_example(gold, pred)
    ps = []
    if mode == 'special':
        for i in range(len(TPs)):
            #special case where no LMs in the gold and the system outputs nothing
            #it's commented because we're evaluting for the positive label (LOC) here
            if list(set(gold[i] + pred[i])) == []:
                #print("special case")
                p = 1.00
            else:
                den = len(pred[i]) # FIXME can be replaced with TPs[i] + FPs[i]
                p = TPs[i]/den if den > 0 else 0
            
            ps.append(p)
    else:    
        for i in range(len(TPs)):
            den = len(pred[i])  # FIXME can be replaced with TPs[i] + FPs[i]
            p = TPs[i]/den if den > 0 else 0 
            ps.append(p)
    #print(ps)
    if internal:
        return ps
    else:
        return sum(ps)/len(ps)

def compute_recall(gold, pred, mode, internal=False):
    if len(gold) != len(pred):
        raise NameError("Found input variables with inconsistent numbers of samples! len(gold) = %d and len(pred) = %d" 
                        % (len(gold), len(pred)))
    
    TPs, FPs, FNs = count_matrix_per_example(gold, pred)
    #print([TPs, FPs, FNs])
    
    rs = []
    if mode == 'special':
        #special case where no LMs in the gold and the system outputs nothing
        #it's commented because we're evaluting for the positive label (LOC) here
        for i in range(len(TPs)):
            if list(set(gold[i] + pred[i])) == []:
                #print("special case")
                r = 1.00
            else:        
                den = len(gold[i])  # FIXME can be replaced with TPs[i] + FNs[i]
                r = TPs[i]/den if den > 0 else 0
            rs.append(r)
    else:
        for i in range(len(TPs)):      
            den =  len(gold[i]) # FIXME can be replaced with  TPs[i] + FNs[i]
            r = TPs[i]/den if den > 0 else 0
            rs.append(r)
    #print(rs)
    if internal:
        return rs
    else:
        return sum(rs)/len(rs)

def compute_fscore(beta, gold, pred, mode):
    ps = compute_precision(gold, pred, mode, True)
    rs = compute_recall(gold, pred, mode, True)

    fs = []
    for i in range(len(ps)):
        if ps[i] + rs[i] == 0.00:
            f = 0.00
        else:
            beta2 = beta**2
            f = ((1 + beta2) * ps[i] * rs[i]) / (beta2 * ps[i] + rs[i])
        fs.append(f)
        #print("f = %f" % f)
    return sum(fs)/len(fs)


def count_special(gold, pred):
    count = 0
    for i in range(len(gold)):
        if list(set(gold[i] + pred[i])) == []:
            count += 1
    return count

## Typeless evaluation

In [None]:
def evaluate(gold_path, gold_file_format, pred_path, pred_file_format, beta, mode, per_dataset):
    #DRIVER code

    # TODO 1. read the gold data 
    #         --> parameters: path, data format (BILOU, JSON)
    #         --> output gold: [[locs]], pred: [[locs]]
    if gold_file_format == "bilou":
        gold = parse_bilou(gold_path)
    elif gold_file_format == "bio":
        gold = parse_bio(gold_path)
    else:
        gold = parse_json(gold_path)
    
    if pred_file_format == "bilou":
        pred = parse_bilou(pred_path)
    elif pred_file_format == "bio":
        pred = parse_bio(pred_path)
    else:
        pred = parse_json(pred_path)
 
    
    #print(gold)
    #print(pred)
        
    #print(len(gold))
    #print(len(pred))
    
    #print()
    if per_dataset:
        gold = [[item for sublist in gold for item in sublist]]
        pred = [[item for sublist in pred for item in sublist]]
    
    # TODO 2. count the TP, TN, FP, FN
    #         --> parameters: average (micro, macro), gold: <tid:[locs]>, pred: <tid:[locs]>
    #         --> TP, TN, FP, FN
    # TODO 3. compute P, R, and F_b --> parameters: 
    #         --> TP, TN, FP, FN
    #         --> P, R, F_b
    p = compute_precision(gold, pred, mode)
    r = compute_recall(gold, pred, mode)
    f = compute_fscore(beta, gold, pred, mode)

    return p, r, f



## Type-based evaluation

In [None]:
def filter_by_type(annot, typ):
    annot_type = []
    for i in range(len(annot)):
        a = []
        for j in range(len(annot[i])):
            if annot[i][j][1] == typ:
                a.append(annot[i][j])
                #print(annot[i][j])
                #print(a)
                #print()
        annot_type.append(a)

        #print(annot_type)
        #print()
        #print()
    return annot_type

In [None]:
def filter_by_type2(annot, typ):
    annot_type = []
    for i in range(len(annot)):
        a = []
        if annot[i][1] == typ:
            a.append(annot[i])
            #print(annot[i])
            #print(a)
            #print()
        annot_type.append(a)

        #print(annot_type)
        #print()
        #print()
    return annot_type

In [None]:
def evaluate_by_type(gold_path, gold_file_format, pred_path, pred_file_format, beta, LOC, average, e_method):
    #DRIVER code

    # TODO 1. read the gold data 
    #         --> parameters: path, data format (BILOU, JSON)
    #         --> output gold: [[locs]], pred: [[locs]]
    gold = parse_bilou_by_type(gold_path) if gold_file_format == "bilou" else parse_json_by_type(gold_path, LOC)
    pred = parse_bilou_by_type(pred_path) if pred_file_format == "bilou" else parse_json_by_type(pred_path, LOC)
    
    #print(gold)
    #print(pred)
    
    f_gold = {item[1] for sublist in gold for item in sublist}
    f_pred = {item[1] for sublist in pred for item in sublist}
    types_to_eval = f_gold | f_pred
    #print(types_to_eval)
        
    ps = [] #every entry for one type
    rs = []
    fs = []
    
    #gold = [[item for sublist in gold[:10] for item in sublist]]
    #pred = [[item for sublist in pred[:10] for item in sublist]]
    #print(gold)
    #print(pred)
    
    if average == "macro":
        for typ in types_to_eval:
            gold_typ = filter_by_type(gold, typ)
            pred_typ = filter_by_type(pred, typ)
            #print(gold_typ)
            #print(pred_typ)
            ps.append(compute_precision(gold_typ, pred_typ, e_method))
            rs.append(compute_recall(gold_typ, pred_typ, e_method))
            fs.append(compute_fscore(beta, gold_typ, pred_typ, e_method))
            #print(ps)
            #print(rs)
            #print(fs)
            
        
        
        p = sum(ps)/len(ps)
        r = sum(rs)/len(rs)
        f = sum(fs)/len(fs)
        #print([(x, y) for x, y in zip(types_to_eval, fs)])

    else: #default average is micro
        #FIXME the special case is not handled here! this case is not implemented correctly!
        #some code in the last cell in this notebook
        p = 0
        r = 0
        f = 0

    return p, r, f


In [None]:
path = "..."
e_method = 'special'# or 'standard'
beta = 1
per_dataset = False

for typ in ['typeless', 'typebased']:
    for event in en_events:
        pp = path + "baselines/random/" + typ + "/CRF-" + event + ".txt"
        gp = path + "/random/" + typ + "/" + event + "/test.txt"            
        pm, rm, fm = evaluate(gp, "bilou", pp, "bilou", 1, e_method, per_dataset)
        print("%s\t%s\t%f\t%f\t%f" % (event, case, pm, rm, fm))


In [None]:
path = "..."
e_method = 'special'# or 'standard'
LOC = False
average = 'macro'
beta = 1

for typ in ['typeless', 'typebased']:
    for event in en_events:
        pp = path + "baselines/random/" + typ + "/CRF-" + event + ".txt"
        gp = path + "/random/" + typ + "/" + event + "/test.txt"            
        pm, rm, fm = evaluate_by_type(gp, "json", pp, "bilou", beta, LOC, average, e_method)
        print("%s\t%s\t%f\t%f\t%f" % (event, case, pm, rm, fm))

