## Читаем данные

In [12]:
import pandas as pd

In [15]:
df = pd.read_csv('https://raw.githubusercontent.com/google-research-datasets/gap-coreference/master/gap-validation.tsv', sep='\t')
df.head(3)

Unnamed: 0,ID,Text,Pronoun,Pronoun-offset,A,A-offset,A-coref,B,B-offset,B-coref,URL
0,validation-1,He admitted making four trips to China and pla...,him,256,Jose de Venecia Jr,208,False,Abalos,241,False,http://en.wikipedia.org/wiki/Commission_on_Ele...
1,validation-2,"Kathleen Nott was born in Camberwell, London. ...",She,185,Ellen,110,False,Kathleen,150,True,http://en.wikipedia.org/wiki/Kathleen_Nott
2,validation-3,"When she returns to her hotel room, a Liberian...",his,435,Jason Scott Lee,383,False,Danny,406,True,http://en.wikipedia.org/wiki/Hawaii_Five-0_(20...


## Обрезаем первые 100 вхождений

In [93]:
test_df = df.iloc[:100].copy()

## Получаем предсказанные кластеры

In [94]:
"""
clusters: List[List[List[str]]]
например, для validation-2 это будет примерно так:

clusters[1] -> [['Kathleen Nott', 'Her', 'her', 'Kathleen', 'She'], 
                ['London', 'London', 'London']]
"""

clusters = ... #your code here

test_df['clusters'] = clusters

## Сравниваем предсказания со стандартом

считаем tp, fp, tn, fn, чтобы потом можно было посчитать метрики

In [227]:
def compare(pronoun, a, a_value, b, b_value, clusters):

    #clusters = [flatten(c) for c in clusters]

    positives = []
    negatives = []
    if a_value:
        positives.append([pronoun, a])
    else:
        negatives.append([pronoun, a])

    if b_value:
        positives.append([pronoun, b])
    else:
        negatives.append([pronoun, b])

    
    tp_clusters = []
    tp_gold = []

    fp_clusters = []
    fp_gold = []

    tn_clusters = []
    fn_clusters = []

    # for cluster in clusters:

    #     for p in positives:
    #         if p[0] and p[1] in cluster:
    #             tp_clusters.append(cluster)
    #     for n in negatives:
    #         if n[0] and n[1] in cluster:
    #             fp_clusters.append(cluster)

    for p in positives:
        for cluster in clusters:
            if p[0] in cluster and p[1] in cluster:
                tp_clusters.append(cluster)
                tp_gold.append(p)

    for p in positives:
        if p not in tp_gold:
            fn_clusters.append(p)

    for n in negatives:
        for cluster in clusters:
            if n[0] in cluster and n[1] in cluster:
                fp_clusters.append(cluster)
                fp_gold.append(n)

    for n in negatives:
        if n not in fp_gold:
            tn_clusters.append(n)

    return tp_clusters, fp_clusters, tn_clusters, fn_clusters

In [228]:
res = test_df.apply(lambda x: compare(x['Pronoun'], x['A'], x['A-coref'], x['B'], x['B-coref'], x['clusters']), axis=1).values

Заодно перепишем списки в строки, чтобы на них можно было удобно смотреть глазами при экспорте в файл

In [233]:
def clusters_to_string(clusters):
    return '; '.join([', '.join([m for m in c]) for c in clusters])

In [236]:
for i, n in enumerate(['tp', 'fp', 'tn', 'fn']):

    test_df[n] = [r[i] for r in res]
    test_df[f'{n}_count'] = test_df[n].apply(len)
    test_df[f'str_{n}'] = test_df[n].apply(clusters_to_string)

In [240]:
test_df['str_clusters'] = test_df.clusters.apply(clusters_to_string)

Посмотрим на результаты

In [237]:
test_df[['Pronoun', 'A', 'A-coref', 'B', 'B-coref', 'tp_count', 'fp_count', 'tn_count', 'fn_count']]

Unnamed: 0,Pronoun,A,A-coref,B,B-coref,tp_count,fp_count,tn_count,fn_count
0,him,Jose de Venecia Jr,False,Abalos,False,0,0,2,0
1,She,Ellen,False,Kathleen,True,1,0,1,0
2,his,Jason Scott Lee,False,Danny,True,1,0,1,0
3,he,Reucassel,True,Debnam,False,0,0,1,1
4,she,Finch Hatton,False,Beryl Markham,True,0,0,1,1
...,...,...,...,...,...,...,...,...,...
95,he,Fred Ziffel,False,Drucker,True,1,0,1,0
96,her,Seema,False,Shalini,False,0,1,1,0
97,she,Branton,False,Heloise,False,0,0,2,0
98,his,Hibbert,True,Christopher Robin,False,1,0,1,0


In [238]:
len(test_df[test_df['fp_count'] + test_df['fn_count'] > 0])

40

In [239]:
len(test_df[test_df['tp_count'] + test_df['tn_count'] == 2])

60

Посмотрим на ошибки (false positives + false negatives)

In [243]:
falses = test_df[test_df['fp_count'] + test_df['fn_count'] > 0]
falses[['Pronoun', 'A', 'A-coref', 'B', 'B-coref', 'str_clusters', 'str_fp', 'str_fn', 'str_tp', 'str_tn']]

Unnamed: 0,Pronoun,A,A-coref,B,B-coref,str_clusters,str_fp,str_fn,str_tp,str_tn
3,he,Reucassel,True,Debnam,False,"the then opposition leader Peter Debnam, Debna...",,"he, Reucassel",,"he, Debnam"
4,she,Finch Hatton,False,Beryl Markham,True,"Karen Blixen, her, her; her husband, Finch Hat...",,"she, Beryl Markham",,"she, Finch Hatton"
5,he,James Randi,False,Jos* Alvarez,True,"stage performer Jos * Alvarez, he",,"he, Jos* Alvarez",,"he, James Randi"
7,his,Colin,False,Jake Burns,True,"He, He, he, Colin; a singer and guitar player ...",,"his, Jake Burns",,"his, Colin"
8,he,Scott,False,Cowan,True,"F . Scott Fitzgerald ' s, Fitzgerald, his, Sco...","F . Scott Fitzgerald ' s, Fitzgerald, his, Sco...","he, Cowan",,
9,her,Beverley Callard,True,Liz,False,"her, Liz, her, Beverley Callard, Liz; his, Jim","her, Liz, her, Beverley Callard, Liz",,"her, Liz, her, Beverley Callard, Liz",
12,her,Queen,True,Crystal,False,"Princess Luminous, her, her, the person who ki...",,"her, Queen",,"her, Crystal"
13,his,Dan Dailey,False,Michael Kidd,True,"dancer / choreographer Michael Kidd, his",,"his, Michael Kidd",,"his, Dan Dailey"
18,his,Dwight,False,Andy,True,"Dwight ' s, Dwight ' s, Dwight; Andy, Andy, An...",,"his, Andy",,"his, Dwight"
19,his,Morris,False,David W. Taylor,True,"Rear Admiral David W . Taylor, his",,"his, David W. Taylor",,"his, Morris"


## Выгружаем в файл, который можно загрузить в гугл-таблицы

In [246]:
export = ['Text', 'Pronoun', 'A', 'A-coref', 'B', 'B-coref', 
          'str_clusters', 'str_fp', 'str_fn', 'str_tp', 'str_tn',
          'tp_count', 'fp_count', 'tn_count', 'fn_count']
export_df = test_df[export]
export_df.to_csv('results.tsv', sep='\t')

*Анна Полянская, 2020*