In [129]:
# Insert code here.
!pip install dill
!pip install nltk
!pip install flair
!pip install networkx



In [130]:
# Insert code here.
import dill
import pickle 
import collections
import math

from globalfn.annotations import all_annotations
from globalfn.annotations import annotation
from flair.data import Sentence
import networkx as nx

import nltk
nltk.download('framenet_v17')
from nltk.corpus import framenet as fn

[nltk_data] Downloading package framenet_v17 to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package framenet_v17 is already up-to-date!


In [131]:
# get all frames
frames = set()
for frame in fn.frames():
    frames.add(frame.name)

# # check annotations
# for ID, annos in all_annotations('en').items():
#     for anno in annos:
#         assert anno.frameName in frames

# # check baseline
# baseline_res = pickle.load(open("/home/jovyan/work/saved/results/de_ID_to_frames.pkl", 'rb'))
# for res in baseline_res.values():
#     for word_frame in res:
#         if word_frame:
#             _, frame = word_frame
#             assert frame in frames

# # check modification_2
# mod_2 = pickle.load(open('/home/jovyan/work/saved/results/modification_2/de_en_ID_to_frames.pkl', 'rb'))
# for res in mod_2.values():
#     for word_source_target_f in res:
#         _, _, frame = word_source_target_f
#         assert frame in frames

In [132]:
def evaluate(pred, true, frames=frames, show_individual=False):
    sum_prec = count_prec = 0
    sum_recall = count_recall = 0
    sum_f1 = count_f1 = 0

    for ID in true.keys():
        tp = fp = tn = fn = 0
        for true_frame in true[ID]:
            if true_frame in pred[ID]:
                tp += 1
            else:
                fn += 1
        
        for pred_frame in pred[ID]:
            if pred_frame not in true[ID]:
                fp += 1
        
        tn = len(frames) - len(true[ID]) - fp
        if tp + fp == 0:
            prec = "n/a" # there were no positive cases in the input data
        else:
            prec = tp / (tp + fp)
            sum_prec += prec
            count_prec += 1

        if tp + fn == 0:
            recall = "n/a"
        else:
            recall = tp / (tp + fn)  # all instances were predicted as negative
            sum_recall += recall
            count_recall += 1

        if prec == "n/a" or recall == "n/a":
            f1 = "n/a"
        elif prec == 0 and recall == 0:
            f1 = 0
        else:
            f1 = 2*(prec*recall)/(prec + recall)
            sum_f1 += f1
            count_f1 += 1
        
        if show_individual:
            print(f"Precision:{prec}\tRecall:{recall}\tF1:{f1}")
    
    print("-------------------")
    print(f"Avg Precision: {sum_prec/count_prec:3f}\nAvg Recall: {sum_recall/count_recall:3f}\nF1: {sum_f1/count_f1:3f}")
    print("-------------------")

In [133]:
def load_true_frames(lang):
    true = collections.defaultdict(set)
    for ID, annos in all_annotations(lang).items():
        for anno in annos:
            true[ID].add(anno.frameName)
    return true

def load_baseline_frames(lang):
    baseline_res = pickle.load(open(f"/home/jovyan/work/saved/results/{lang}_ID_to_frames.pkl", 'rb'))
    pred = collections.defaultdict(set)
    for ID, res in baseline_res.items():
        for word_frame in res:
            if word_frame:
                _, frame = word_frame
                pred[ID].add(frame)
    return pred

def load_mod_2(source, target):
    mod_2 = pickle.load(open(f'/home/jovyan/work/saved/results/modification_2/{source}_{target}_ID_to_frames.pkl', 'rb'))
    pred = collections.defaultdict(set)
    for IDs, res in mod_2.items():
        _, target_ID = IDs
        for word_source_target_f in res:
            _, _, frame = word_source_target_f
            pred[target_ID].add(frame)
    return pred

In [134]:
lang = "en"
print(f"Language: {lang}")
true, pred = load_true_frames(lang), load_baseline_frames(lang)
evaluate(pred, true)

lang = "de"
print(f"Language: {lang}")
true, pred = load_true_frames(lang), load_baseline_frames(lang)
evaluate(pred, true)

lang = "pt"
print(f"Language: {lang}")
true, pred = load_true_frames(lang), load_baseline_frames(lang)
evaluate(pred, true)

Language: en
-------------------
Avg Precision: 0.109412
Avg Recall: 0.091873
F1: 0.312200
-------------------
Language: de
-------------------
Avg Precision: 0.096429
Avg Recall: 0.075077
F1: 0.320276
-------------------
Language: pt
-------------------
Avg Precision: 0.118178
Avg Recall: 0.092820
F1: 0.260907
-------------------


In [295]:
source = 'de'
target = 'en'
print(f"From {source} to {target}")
true, pred = load_true_frames(target), load_mod_2(source, target)
evaluate(pred, true)

source = 'pt'
target = 'en'
print(f"From {source} to {target}")
true, pred = load_true_frames(target), load_mod_2(source, target)
evaluate(pred, true)

source = 'en'
target = 'de'
print(f"From {source} to {target}")
true, pred = load_true_frames(target), load_mod_2(source, target)
evaluate(pred, true)

source = 'en'
target = 'pt'
print(f"From {source} to {target}")
true, pred = load_true_frames(target), load_mod_2(source, target)
evaluate(pred, true)

From de to en
-------------------
Avg Precision: 0.494444
Avg Recall: 0.029318
F1: 0.412082
-------------------
From pt to en
-------------------
Avg Precision: 0.478632
Avg Recall: 0.111685
F1: 0.469558
-------------------
From en to de
-------------------
Avg Precision: 0.446429
Avg Recall: 0.098714
F1: 0.446825
-------------------
From en to pt
-------------------
Avg Precision: 0.645631
Avg Recall: 0.087745
F1: 0.402580
-------------------


In [296]:
set_fr = set()
G = nx.DiGraph()
for frame in fn.frames():
    if not frame.frameRelations:
        G.add_node(frame.name)
    for fr in frame.frameRelations:
        set_fr.add(fr.type.name)
        if fr.type.name != 'ReFraming_Mapping':
            G.add_edge(fr.superFrame.name, fr.subFrame.name, fr=fr.type.name)
            assert "superFrame" in fr.keys()
            assert "subFrame" in fr.keys()
            assert "name" in fr.type.keys()
        else:
            G.add_node(frame.name)
set_fr

{'Causative_of',
 'Inchoative_of',
 'Inheritance',
 'Metaphor',
 'Perspective_on',
 'Precedes',
 'ReFraming_Mapping',
 'See_also',
 'Subframe',
 'Using'}

In [297]:
# # visualize FrameNet
# pos = nx.spring_layout(G)
# nx.draw(G, pos=pos, node_size=60,font_size=8) 

In [303]:
def helper(target, source_set, memo, G_reverse):
    if target in memo:
        return memo[target]
    if target in source_set:
        memo[target] = 1.0
        return memo[target]
    
    res = 0
    for prev in G_reverse[target]:
        tmp = helper(prev, source_set, memo, G_reverse)
        res += tmp
    res = (1-math.exp(5*(-res))) / (1+math.exp(-res))  # formula from the paper
    memo[target] = res
    return res


def graded_FSEM_for_ID(ID, pred_set, true_set, G):
    if not pred_set or not true_set:
        return 0

    FSEM = {}
    subG_dict = collections.defaultdict(set)  # target -> source
    for source in pred_set:
        for target in true_set:
            if source == "Observable_body_parts":
                source = "Body_parts"

            assert source in G and target in G
            if not nx.has_path(G, source, target):
                FSEM[(source, target)] = 0.0
            elif source == target:
                FSEM[(source, target)] = 1.0
            else:
                path = list(nx.shortest_path(G, source, target))[::-1]  # the edges are from target nodes (true) to source nodes (pred)
                for i in range(len(path) - 1):
                    subG_dict[path[i]].add(path[i + 1])

    if subG_dict:
        subG = nx.DiGraph()
        for source in subG_dict.keys():
            for target in subG_dict[source]:
                subG.add_edge(source, target)

        # graph must be a DAG to compute FSEM
        assert nx.algorithms.dag.is_directed_acyclic_graph(subG)  

        # if not (nx.algorithms.dag.is_directed_acyclic_graph(subG)):
        #     print(ID)
        #     cycles = list(nx.algorithms.cycles.find_cycle(subG))
        #     print(cycles)
        #     tmp = nx.DiGraph()
        #     for source, target in cycles:
        #         tmp.add_edge(source, target, fr=G[source][target])
        #         print(source, target, G[source][target])

        #     nx.draw(tmp, with_labels = True, font_size=10, alpha=0.5)
        #     assert False

        # use recursive + memoization to compute FSEM for the target node.
        memo = {}
        for target in true_set:
            if target in subG:
                FSEM[target] = helper(target, pred_set, memo, subG)
    
    # calculate the average FSEM for a sentence ID
    return sum(list(FSEM.values()))/len(FSEM.values())

def graded_FSEM(pred, true, G):
    FSEM_sum = count = 0
    for ID in true.keys():
        count += 1
        FSEM_value = graded_FSEM_for_ID(ID, pred[ID], true[ID], G)
        FSEM_sum += FSEM_value
    return FSEM_sum / count

In [304]:
source = 'pt'
target = 'en'
print(f"From {source} to {target}")
true, pred = load_true_frames(target), load_mod_2(source, target)
print(f"Graded FSEM: {graded_FSEM(pred, true, G)}")

From pt to en
Graded FSEM: 0.08496195414560603


In [305]:
lang='en'
print(f"Language: {lang}")
true, pred = load_true_frames(lang), load_baseline_frames(lang)
print(f"Graded FSEM: {graded_FSEM(pred, true, G)}")

Language: en
Graded FSEM: 0.05940877481114271


In [306]:
# baseline
lang='pt'
print(f"Language: {lang}")
true, pred = load_true_frames(lang), load_baseline_frames(lang)
print(f"Graded FSEM: {graded_FSEM(pred, true, G)}")

lang='en'
print(f"Language: {lang}")
true, pred = load_true_frames(lang), load_baseline_frames(lang)
print(f"Graded FSEM: {graded_FSEM(pred, true, G)}")

lang='de'
print(f"Language: {lang}")
true, pred = load_true_frames(lang), load_baseline_frames(lang)
print(f"Graded FSEM: {graded_FSEM(pred, true, G)}")

Language: pt
Graded FSEM: 0.05059818322170205
Language: en
Graded FSEM: 0.05940877481114271
Language: de
Graded FSEM: 0.052536262018253614


In [307]:
source = 'de'
target = 'en'
print(f"From {source} to {target}")
true, pred = load_true_frames(target), load_mod_2(source, target)
print(f"Graded FSEM: {graded_FSEM(pred, true, G)}")

source = 'pt'
target = 'en'
print(f"From {source} to {target}")
true, pred = load_true_frames(target), load_mod_2(source, target)
print(f"Graded FSEM: {graded_FSEM(true, pred, G)}")

source = 'en'
target = 'de'
print(f"From {source} to {target}")
true, pred = load_true_frames(target), load_mod_2(source, target)
print(f"Graded FSEM: {graded_FSEM(pred, true, G)}")

source = 'en'
target = 'pt'
print(f"From {source} to {target}")
true, pred = load_true_frames(target), load_mod_2(source, target)
print(f"Graded FSEM: {graded_FSEM(pred, true, G)}")

From de to en
Graded FSEM: 0.01982764348763881
From pt to en
Graded FSEM: 0.15992585614605206
From en to de
Graded FSEM: 0.08031102562039096
From en to pt
Graded FSEM: 0.0720554639011797
