In [28]:
import pandas as pd
def evaluate(df: pd.DataFrame):
    ref_num = sum(df.event_triples.apply(lambda x: len(x)))
    pred_num = sum(df.pred_event_triples.apply(lambda x: len(x)))
    trg_match_cnt = 0
    arg_match_cnt = 0
    triple_match_cnt = 0
    unmatched_trgs = []
    for idx, row in df.iterrows():
        for pred in row.pred_event_triples:
            if len(pred) == 1:
                pred = ["", pred[0], ""]
            trg_match = arg_match = triple_match = False
            for ref in row.event_triples:
                if pred == ref: triple_match = True
                if pred[1]==ref[1]: trg_match = True
                tmp_pred, tmp_ref = pred.copy(), ref.copy()
                tmp_pred.pop(1)
                tmp_ref.pop(1)
                if tmp_pred == tmp_ref: arg_match = True
            if trg_match: trg_match_cnt += 1
            else: unmatched_trgs.append(pred[1])
            if arg_match: arg_match_cnt += 1
            if triple_match: triple_match_cnt += 1
    F1 = lambda p,r: "{:.5f}".format(2*p*r/(p+r))
    trg_p, arg_p, trp_p = trg_match_cnt/pred_num, arg_match_cnt/pred_num, triple_match_cnt/pred_num
    trg_r, arg_r, trp_r = trg_match_cnt/ref_num, arg_match_cnt/ref_num, triple_match_cnt/ref_num
    trg_f, arg_f, trp_f = F1(trg_p, trg_r), F1(arg_p, arg_r), F1(trp_p, trp_r)

    from tabulate import tabulate
    header = ["task", "Precision", "Recall", "F1"]
    rows = [
        ("Trigger Identification", trg_p, trg_r, trg_f),
        ("Argument Identification", arg_p, arg_r, arg_f),
        ("Triple Identification", trp_p, trp_r, trp_f),
    ]
    print(tabulate(rows, headers=header))
    print("ref num %d, pred num %d" % (ref_num, pred_num))
    print("trg match: %d, arg match: %d, trp match: %d" % (trg_match_cnt, arg_match_cnt, triple_match_cnt))
    return unmatched_trgs

In [15]:
df = pd.read_csv("output/arg_ner/bert-base-chinese/pipeline_predictions.csv")
df['event_triples'] = df['event_triples'].apply(lambda x: eval(x.lower().replace(" ", "")))
df['pred_event_triples'] = df['pred_event_triples'].apply(lambda x: eval(x.lower().replace(" ", "")))
df['pred_event_triples'] = df['pred_event_triples'].apply(lambda x: [i for i in x if i!=[]])
evaluate(df)

task                       Precision    Recall       F1
-----------------------  -----------  --------  -------
Trigger Identification      0.704233  0.700626  0.70243
Argument Identification     0.482838  0.480364  0.4816
Triple Identification       0.403175  0.40111   0.40214
ref num 7028, pred num 6992
trg match: 4924, arg match: 3376, trp match: 2819


In [30]:
df = pd.read_csv("output/arg_ner/bert-base-chinese/arg_predictions.csv")
df['event_triples'] = df['event_triples'].apply(lambda x: eval(x.lower().replace(" ", "")))
df['pred_event_triples'] = df['pred_event_triples'].apply(lambda x: eval(x.lower().replace(" ", "")))
df['pred_event_triples'] = df['pred_event_triples'].apply(lambda x: [i for i in x if i!=[]])
_ = evaluate(df)

task                       Precision    Recall       F1
-----------------------  -----------  --------  -------
Trigger Identification      0.993959  0.993959  0.99396
Argument Identification     0.325902  0.325902  0.3259
Triple Identification       0.322307  0.322307  0.32231
ref num 6953, pred num 6953
trg match: 6911, arg match: 2266, trp match: 2241


In [32]:
print(_)

None


In [16]:
df = pd.read_csv("output/arg_qa/bert-base-chinese/pipeline_predictions.csv")
df['event_triples'] = df['event_triples'].apply(lambda x: eval(x.lower().replace(" ", "")))
df['pred_event_triples'] = df['pred_event_triples'].apply(lambda x: eval(x.lower().replace(" ", "")))
df['pred_event_triples'] = df['pred_event_triples'].apply(lambda x: [i for i in x if i!=[]])
evaluate(df)

task                       Precision    Recall       F1
-----------------------  -----------  --------  -------
Trigger Identification      0.740332  0.679738  0.70874
Argument Identification     0.622612  0.571653  0.59605
Triple Identification       0.420562  0.38614   0.40262
ref num 7013, pred num 6439
trg match: 4767, arg match: 4009, trp match: 2708


In [84]:
from tabulate import tabulate
header = ["task", "Precision", "Recall", "F1"]
rows = [
    ("Trigger Identification", trg_p, trg_r, trg_f),
    ("Argument Identification", arg_p, arg_r, arg_f),
    ("Triple Identification", trp_p, trp_r, trp_f),
]
print(tabulate(rows, headers=header))
print("ref num %d, pred num %d" % (ref_num, pred_num))
print("trg match: %d, arg match: %d, trp match: %d" % (trg_match_cnt, arg_match_cnt, triple_match_cnt))

task                       Precision    Recall       F1
-----------------------  -----------  --------  -------
Trigger Identification      0.892964  0.857656  0.87495
Argument Identification     0.774701  0.744069  0.75908
Triple Identification       0.625     0.600288  0.61239
ref num 1391, pred num 1336
trg match: 1193, arg match: 1035, trp match: 835


In [11]:
df

Unnamed: 0,title_id,title,event_triples,pred_event_triples
0,14282,新年新气象！新昌连续征收9宗地块，涉及七星、澄谭...,"[[新昌, 征收, 9宗地块], [9宗地块, 涉及, 七星、澄谭]]","[[新昌, 征收, 9宗地块], [新昌9宗地块, 涉及, 七星、澄谭]]"
1,25525,"方媛不顾疫情出街购物,惹郭富城不满当街发飙,紧皱眉头怒瞪镜头","[[方媛, 出街购物, ], [方媛, 惹不满, 郭富城], [郭富城, 发飙, ], [郭...","[[方媛, 不顾, 疫情], [方媛, 出街购物, ], [方媛, 惹不满, 郭富城], [..."
2,4594,保山市隆阳区西邑乡2022年上半年征兵工作全面启动,"[[保山市隆阳区西邑乡2022年上半年征兵工作, 启动, ]]","[[保山市隆阳区西邑乡2022上半年征兵工作, 全面启动, ]]"
3,10698,徒劳无功！威少打出劲爆数据依然被骂，美媒列数据证他是湖人毒瘤,"[[威少, 打出, 劲爆数据], [威少, 被骂, ]]","[[威少, 打出, 劲爆数据], [威少, 被骂, ], [数据, 列, 美媒], [威少,..."
4,35618,“雪容融”上“线”！玉环非遗刺绣为冬残奥会加油,"[[“雪容融”, 上“线”, ], [玉环非遗刺绣, 为加油, 冬残奥会]]","[[玉环非遗刺绣, 为加油, 冬残奥会]]"
...,...,...,...,...
4287,6265,广东省住房城乡建设厅：将尽快出台省级建筑废弃物管理条例,"[[广东省住房城乡建设厅, 将出台, 省级建筑废弃物管理条例], [广东省住房城乡建设厅, ...","[[广东省住房城乡建设厅, ：, 将尽快出台省级建筑废弃物管理条例], [广东省住房城乡建设..."
4288,11284,"飞尚科技发起成立江西省城市安全信息化学会,提升城市公共安全治理能力","[[飞尚科技, 发起成立, 江西省城市安全信息化学会], [飞尚科技, 提升, 城市公共安全...","[[飞尚科技, 发起成立, 江西省城市安全信息化学会], [飞尚科技发起, 提升, 城市公共..."
4289,38158,"被受质疑的溢涌堂;从虚假宣传被罚到模式涉嫌传销,再到被异地冻结账户!","[[溢涌堂, 虚假宣传, ], [溢涌堂, 被罚, ], [溢涌堂模式, 涉嫌传销, ], ...","[[溢涌堂, 虚假宣传, 被罚到模式], [溢涌堂, 被罚, ], [溢涌堂, 涉嫌, 传销..."
4290,860,"姐弟坠亡案：聊天记录公布,生父与小三合谋杀害子女,触目惊心","[[姐弟坠亡案聊天记录, 公布, ], [生父与小三, 合谋杀害, 子女]]","[[姐弟坠亡案聊天记录, 公布, ], [姐弟生父与小三, 合谋杀害, 子女]]"


In [42]:
(df2.event_triples == df.event_triples).all()

True

In [32]:
ids = []
for i in range(len(df)):
    if df.loc[i].event_triples != df2.loc[i].event_triples:
        ids.append(i)

In [34]:
df.loc[502]

title_id                                         7996
title                 弗爵一召唤C罗马上回归曼联！从内到外，揭秘爵爷打造无敌C罗细节
event_triples                          [[弗爵, 召唤, c罗]]
pred_event_triples                     [[弗爵, 召唤, c罗]]
Name: 502, dtype: object

In [35]:
df2.loc[502]

title_id                                         4557
title                 弗爵一召唤C罗马上回归曼联！从内到外，揭秘爵爷打造无敌C罗细节
event_triples            [[弗爵, 召唤, c罗], [爵爷, 打造, c罗]]
pred_event_triples    [[弗爵, 召唤, c罗], [c罗, 打造, 爵爷, 罗]]
Name: 502, dtype: object