In [1]:
# for colab
# !pip install spacy_stanza
# !pip install ckip_transformers

# 1. import packages

In [288]:
import pandas as pd
from tqdm.notebook import tqdm
import pickle
import pathlib
import sys
from spacy.tokens import Doc

# 2. Prepare Spacy Pipeline

In [289]:
spacy_pipeline_parent_path = pathlib.Path.cwd().parent.parent.parent.parent
sys.path.append(str(spacy_pipeline_parent_path))

from spacy_pipeline import pipeline_setup
from spacy_pipeline import opinion_rule

In [341]:
methods = {
    "opinion_v0": {
        "version": "opinion_v0",
        "pattern": [
            {
                "RIGHT_ID": "OPINION_OPR_found_root",
                "RIGHT_ATTRS": {
                    "TAG": {
                        "IN": ["VE"]
                    },
                }
            },
            {
                "LEFT_ID": "OPINION_OPR_found_root",
                "REL_OP": ">",
                "RIGHT_ID": "OPINION_SRC_found_root",
                "RIGHT_ATTRS": {
                    "DEP": {
                        "IN": ["nsubj"]
                    },
                }
            },
            {
                "LEFT_ID": "OPINION_OPR_found_root",
                "REL_OP": ">",
                "RIGHT_ID": "OPINION_SEG_found_root",
                "RIGHT_ATTRS": {
                    "DEP": {
                        "IN": ["ccomp", "parataxis"]
                   },
                   "POS": {
                          "IN": ["VERB", "NOUN", "ADJ"]
                   }
                }
            }
        ]
    },
}

In [342]:
pipeline = pipeline_setup.get_opinion_pipeline(methods['opinion_v0'])
vocab = pipeline.vocab

2023-03-26 04:09:31 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.5.0.json:   0%|   …

2023-03-26 04:09:31 INFO: Loading these models for language: zh-hant (Traditional_Chinese):
| Processor | Package |
-----------------------
| tokenize  | gsd     |
| pos       | gsd     |
| lemma     | gsd     |
| depparse  | gsd     |

2023-03-26 04:09:31 INFO: Using device: cpu
2023-03-26 04:09:31 INFO: Loading: tokenize
2023-03-26 04:09:32 INFO: Loading: pos
2023-03-26 04:09:32 INFO: Loading: lemma
2023-03-26 04:09:32 INFO: Loading: depparse
2023-03-26 04:09:32 INFO: Done loading processors!


['opinion_matcher']
[1m

#   Component         Assigns   Requires   Scores   Retokenizes
-   ---------------   -------   --------   ------   -----------
0   opinion_matcher                                 False      

[38;5;2m✔ No problems found.[0m
{'summary': {'opinion_matcher': {'assigns': [], 'requires': [], 'scores': [], 'retokenizes': False}}, 'problems': {'opinion_matcher': []}, 'attrs': {}}


## 2.2 Define workflow

In [343]:
def run_work_flow(all_docs, spacy_pipeline, n_process=1):
    
    # try:
    with tqdm(total=len(all_docs)) as pbar:
        
        for paragraphs in all_docs:
                
            for i, doc in enumerate(spacy_pipeline.pipe(paragraphs, n_process=n_process)):
                # print("runing:", i)
                pass
            pbar.update(1)
            
    #         with open(f"{file_dir}/{file_name}.pkl", "wb") as f:
    #             bytes_data = [[doc.to_bytes() for doc in docs] for docs in all_docs]
    #             pickle.dump(bytes_data, f)
        
    # except:
    #     with open(f"{file_dir}/{file_name}[except].pkl", "wb") as f:
    #         bytes_data = [[doc.to_bytes() for doc in docs] for docs in all_docs]
    #         pickle.dump(bytes_data, f)
    #     pass
    return all_docs

# load all_docs

In [344]:
pickle_dir = str(pathlib.Path.cwd().parent)
pickle_file = 'label_news_200_docs_stanza.pkl'

with open(pickle_dir + '/' + pickle_file, 'rb') as f:
    bytes_data = pickle.load(f)
    all_docs_stanza = [[Doc(vocab).from_bytes(doc_bytes) for doc_bytes in docs] for docs in bytes_data]

In [345]:
all_docs_stanza[0]

[民進黨立委范雲表示將提出修法，讓女性有資格服兵役。女性是否納義務役引起各界討論，對女性服兵役是否影響未來民進黨的選情？台南市長黃偉哲今天低調表示，希望多所考量，並多徵詢各方意見。,
 黃偉哲今天參加台南市元旦升旗典禮，針對立委范雲要修法讓女性有資格服兵役一事，他受訪表示，尊重立委的提案權，有很多國家女性是不用服兵役，因為立法院是合議制，要經過多數立委同意才有辦法修法，對於范雲的主張，他表示尊重。,
 對於范雲的主張，會不會影響未來民進黨的選情？黃偉哲表示，「這個喔？無從評估！」「其實在台灣，已有女性志願役，如要改成義務役，大家還是…」黃偉哲欲言又止，他說，「希望多所考量，並多徵詢各方意見。」]

In [346]:
all_docs = run_work_flow(all_docs_stanza, pipeline, n_process=1)

  0%|          | 0/200 [00:00<?, ?it/s]

In [347]:
all_docs[-1][-1].spans

{'ckip_ner': [陳其邁, 民進黨], 'opinion_label': [陳其邁, 指出, 對於選舉的結果，民進黨將會繼續記取經驗，調整我們的腳步，不辜負支持者對我們的鞭策和期待，持續向前邁進。], 'coreference_label': [], 'opinion_found[0]': [陳其邁, 指出, 對於選舉的結果，民進黨將會繼續記取經驗，調整我們的腳步，不辜負支持者對我們的鞭策和期待, ，持續向前邁進], 'opinion_found': [陳其邁, 指出, 對於選舉的結果，民進黨將會繼續記取經驗，調整我們的腳步，不辜負支持者對我們的鞭策和期待, ，持續向前邁進]}

In [348]:
for s in all_docs[-1][-1].spans['opinion_found']:
    print(s.text, s.label_)

陳其邁 OPINION_SRC_match
指出 OPINION_OPR_match
對於選舉的結果，民進黨將會繼續記取經驗，調整我們的腳步，不辜負支持者對我們的鞭策和期待 OPINION_SEG_match
，持續向前邁進 OPINION_SEG_match


# rewrite evaluation

In [354]:
def get_precision(TP, FP):
    return TP / (TP + FP)

def get_recall(TP, FN):
    return TP / (TP + FN)

def get_f_score(precision, recall, beta=1):
    return (1 + beta**2) * precision * recall / (beta**2 * precision + recall)


def delete_around_punt_check(label_span, match_span):
    puct_list = ['，', '。', '「', '」', ' ', '！', '？', ';', ':']
    if label_span.text[0] in puct_list:
        label_span = label_span[1:]
    if label_span.text[-1] in puct_list:
        label_span = label_span[:-1]
    if match_span.text[0] in puct_list:
        match_span = match_span[1:]
    if match_span.text[-1] in puct_list:
        match_span = match_span[:-1]
    
    if label_span.start == match_span.start and label_span.end == match_span.end:
        return True
    else:
        return False
    
def interval_check(label_span, match_span):
    if label_span.start <= match_span.start and label_span.end >= match_span.end:
        return True
    else:
        return False
    
def tolerance_check(label_span, match_span, tolerance=1):
    if abs(label_span.start - match_span.start) <= tolerance and abs(label_span.end - match_span.end) <= tolerance:
        return True
    else:
        return False

label_match_dict = {
    "OPINION_SRC": "OPINION_SRC_match",
    "OPINION_OPR": "OPINION_OPR_match",
    "OPINION_SEG": "OPINION_SEG_match" 
}

In [355]:
def new_eval(all_docs, method):

    counter = {
        'OPINION_OPR_FP': 0,
        'OPINION_OPR_FN': 0,
        'OPINION_OPR_TP': 0,

        'OPINION_SRC_FP': 0,
        'OPINION_SRC_FN': 0,
        'OPINION_SRC_TP': 0,
        
        'OPINION_SEG_FP': 0,
        'OPINION_SEG_FN': 0,
        'OPINION_SEG_TP': 0,
    }

    for docs in all_docs:
        for doc in docs:

            spans = {
                'OPINION_SRC_TP': [],
                'OPINION_OPR_TP': [],
                'OPINION_SEG_TP': [],

                'OPINION_SRC_FP': [],
                'OPINION_OPR_FP': [],
                'OPINION_SEG_FP': [],

                'OPINION_SRC_FN': [],
                'OPINION_OPR_FN': [],
                'OPINION_SEG_FN': [],
            }
            
            if 'opinion_found' in doc.spans and 'opinion_label' in doc.spans:
                for found_span in doc.spans['opinion_found']:
                    found_match = False
                    for label_span in doc.spans['opinion_label']:
                        if method(label_span, found_span) and found_span.label_ == label_match_dict[label_span.label_]:
                            spans[f"{label_span.label_}_TP"].append(label_span)
                            found_match = True
                            break
                    if not found_match:
                        spans[f"{found_span.label_[:11]}_FP"].append(found_span)
                
                for label_span in doc.spans['opinion_label']:
                    if label_span not in spans[f"{label_span.label_}_TP"]:
                        spans[f"{label_span.label_}_FN"].append(label_span)

            elif 'opinion_found' in doc.spans:
                for found_span in doc.spans['opinion_found']:
                    spans[f"{found_span.label_[:11]}_FP"].append(found_span)

            elif 'opinion_label' in doc.spans:
                for label_span in doc.spans['opinion_label']:
                    spans[f"{label_span.label_}_FN"].append(label_span)

            for key in counter.keys():
                counter[key] += len(spans[key])

    return {
        'OPINION_SRC': {
            'precision': get_precision(counter['OPINION_SRC_TP'], counter['OPINION_SRC_FP']),
            'recall': get_recall(counter['OPINION_SRC_TP'], counter['OPINION_SRC_FN']),
            'f_score': get_f_score(get_precision(counter['OPINION_SRC_TP'], counter['OPINION_SRC_FP']), get_recall(counter['OPINION_SRC_TP'], counter['OPINION_SRC_FN'])),
        },
        'OPINION_OPR': {
            'precision': get_precision(counter['OPINION_OPR_TP'], counter['OPINION_OPR_FP']),
            'recall': get_recall(counter['OPINION_OPR_TP'], counter['OPINION_OPR_FN']),
            'f_score': get_f_score(get_precision(counter['OPINION_OPR_TP'], counter['OPINION_OPR_FP']), get_recall(counter['OPINION_OPR_TP'], counter['OPINION_OPR_FN'])),
        },
        'OPINION_SEG': {
            'precision': get_precision(counter['OPINION_SEG_TP'], counter['OPINION_SEG_FP']),
            'recall': get_recall(counter['OPINION_SEG_TP'], counter['OPINION_SEG_FN']),
            'f_score': get_f_score(get_precision(counter['OPINION_SEG_TP'], counter['OPINION_SEG_FP']), get_recall(counter['OPINION_SEG_TP'], counter['OPINION_SEG_FN']))
        },
        'counter': counter
    }

In [356]:
new_eval(all_docs, delete_around_punt_check)

{'OPINION_SRC': {'precision': 0.6620428751576293,
  'recall': 0.5297679112008072,
  'f_score': 0.5885650224215248},
 'OPINION_OPR': {'precision': 0.8201634877384196,
  'recall': 0.5972222222222222,
  'f_score': 0.6911595866819747},
 'OPINION_SEG': {'precision': 0.15437987857762359,
  'recall': 0.1680830972615675,
  'f_score': 0.1609403254972875},
 'counter': {'OPINION_OPR_FP': 132,
  'OPINION_OPR_FN': 406,
  'OPINION_OPR_TP': 602,
  'OPINION_SRC_FP': 268,
  'OPINION_SRC_FN': 466,
  'OPINION_SRC_TP': 525,
  'OPINION_SEG_FP': 975,
  'OPINION_SEG_FN': 881,
  'OPINION_SEG_TP': 178}}

In [357]:
new_eval(all_docs, interval_check)

{'OPINION_SRC': {'precision': 0.7679697351828499,
  'recall': 0.6011846001974334,
  'f_score': 0.6744186046511628},
 'OPINION_OPR': {'precision': 0.8337874659400545,
  'recall': 0.6071428571428571,
  'f_score': 0.7026406429391503},
 'OPINION_SEG': {'precision': 0.8404163052905465,
  'recall': 0.6852899575671852,
  'f_score': 0.7549668874172186},
 'counter': {'OPINION_OPR_FP': 122,
  'OPINION_OPR_FN': 396,
  'OPINION_OPR_TP': 612,
  'OPINION_SRC_FP': 184,
  'OPINION_SRC_FN': 404,
  'OPINION_SRC_TP': 609,
  'OPINION_SEG_FP': 184,
  'OPINION_SEG_FN': 445,
  'OPINION_SEG_TP': 969}}

In [358]:
new_eval(all_docs, tolerance_check)

{'OPINION_SRC': {'precision': 0.7604035308953342,
  'recall': 0.6072507552870091,
  'f_score': 0.6752519596864501},
 'OPINION_OPR': {'precision': 0.8337874659400545,
  'recall': 0.6071428571428571,
  'f_score': 0.7026406429391503},
 'OPINION_SEG': {'precision': 0.17432784041630528,
  'recall': 0.18980169971671387,
  'f_score': 0.1817359855334539},
 'counter': {'OPINION_OPR_FP': 122,
  'OPINION_OPR_FN': 396,
  'OPINION_OPR_TP': 612,
  'OPINION_SRC_FP': 190,
  'OPINION_SRC_FN': 390,
  'OPINION_SRC_TP': 603,
  'OPINION_SEG_FP': 952,
  'OPINION_SEG_FN': 858,
  'OPINION_SEG_TP': 201}}

In [None]:
exit()

In [325]:
new_eval(all_docs, delete_around_punt_check)

{'OPINION_SRC': {'precision': 0.6620428751576293,
  'recall': 0.5297679112008072,
  'f_score': 0.5885650224215248},
 'OPINION_OPR': {'precision': 0.8201634877384196,
  'recall': 0.5972222222222222,
  'f_score': 0.6911595866819747},
 'OPINION_SEG': {'precision': 0.15437987857762359,
  'recall': 0.1680830972615675,
  'f_score': 0.1609403254972875},
 'counter': {'OPINION_OPR_FP': 132,
  'OPINION_OPR_FN': 406,
  'OPINION_OPR_TP': 602,
  'OPINION_SRC_FP': 268,
  'OPINION_SRC_FN': 466,
  'OPINION_SRC_TP': 525,
  'OPINION_SEG_FP': 975,
  'OPINION_SEG_FN': 881,
  'OPINION_SEG_TP': 178}}

In [326]:
new_eval(all_docs, interval_check)

{'OPINION_SRC': {'precision': 0.7679697351828499,
  'recall': 0.6011846001974334,
  'f_score': 0.6744186046511628},
 'OPINION_OPR': {'precision': 0.8337874659400545,
  'recall': 0.6071428571428571,
  'f_score': 0.7026406429391503},
 'OPINION_SEG': {'precision': 0.8404163052905465,
  'recall': 0.6852899575671852,
  'f_score': 0.7549668874172186},
 'counter': {'OPINION_OPR_FP': 122,
  'OPINION_OPR_FN': 396,
  'OPINION_OPR_TP': 612,
  'OPINION_SRC_FP': 184,
  'OPINION_SRC_FN': 404,
  'OPINION_SRC_TP': 609,
  'OPINION_SEG_FP': 184,
  'OPINION_SEG_FN': 445,
  'OPINION_SEG_TP': 969}}

In [None]:
exit()

In [134]:
# new_eval(all_docs, delete_around_punt_check)
# methods = {
#     "opinion_v0": {
#         "version": "opinion_v0",
#         "pattern": [
#             {
#                 "RIGHT_ID": "OPINION_OPR_found_root",
#                 "RIGHT_ATTRS": {
#                     "TAG": {
#                         "IN": ["VE"]
#                     }
#                 }
#             },
#             {
#                 "LEFT_ID": "OPINION_OPR_found_root",
#                 "REL_OP": ">",
#                 "RIGHT_ID": "OPINION_SRC_found_root",
#                 "RIGHT_ATTRS": {
#                     "DEP": {
#                         "IN": ["nsubj"]
#                     }
#                 }
#             },
#             {
#                 "LEFT_ID": "OPINION_OPR_found_root",
#                 "REL_OP": ">",
#                 "RIGHT_ID": "OPINION_SEG_found_root",
#                 "RIGHT_ATTRS": {
#                     "DEP": {
#                         "IN": ["ccomp", "parataxis"]
#                     },
#                 }
#             }
#         ]
#     },
# }

{'OPINION_SRC': {'precision': 0.6599749058971142,
  'recall': 0.5307769929364279,
  'f_score': 0.5883668903803132},
 'OPINION_OPR': {'precision': 0.8181818181818182,
  'recall': 0.5982142857142857,
  'f_score': 0.6911174785100287},
 'OPINION_SEG': {'precision': 0.15292096219931273,
  'recall': 0.1680830972615675,
  'f_score': 0.1601439496176338},
 'counter': {'OPINION_OPR_FP': 134,
  'OPINION_OPR_FN': 405,
  'OPINION_OPR_TP': 603,
  'OPINION_SRC_FP': 271,
  'OPINION_SRC_FN': 465,
  'OPINION_SRC_TP': 526,
  'OPINION_SEG_FP': 986,
  'OPINION_SEG_FN': 881,
  'OPINION_SEG_TP': 178}}

In [135]:
# new_eval(all_docs, interval_check)

{'OPINION_SRC': {'precision': 0.7653701380175659,
  'recall': 0.6021717670286278,
  'f_score': 0.6740331491712708},
 'OPINION_OPR': {'precision': 0.8317503392130258,
  'recall': 0.6081349206349206,
  'f_score': 0.7025787965616046},
 'OPINION_SEG': {'precision': 0.8393470790378007,
  'recall': 0.6875439831104856,
  'f_score': 0.7558994197292069},
 'counter': {'OPINION_OPR_FP': 124,
  'OPINION_OPR_FN': 395,
  'OPINION_OPR_TP': 613,
  'OPINION_SRC_FP': 187,
  'OPINION_SRC_FN': 403,
  'OPINION_SRC_TP': 610,
  'OPINION_SEG_FP': 187,
  'OPINION_SEG_FN': 444,
  'OPINION_SEG_TP': 977}}

In [None]:
exit()

In [96]:
# new_eval(all_docs, delete_around_punt_check)

{'OPINION_SRC': {'precision': 0.6839887640449438,
  'recall': 0.49142280524722504,
  'f_score': 0.5719318849089841},
 'OPINION_OPR': {'precision': 0.8335854765506808,
  'recall': 0.5466269841269841,
  'f_score': 0.6602756141402036},
 'OPINION_SEG': {'precision': 0.1759465478841871,
  'recall': 0.14919735599622286,
  'f_score': 0.16147164026571284},
 'counter': {'OPINION_OPR_FP': 110,
  'OPINION_OPR_FN': 457,
  'OPINION_OPR_TP': 551,
  'OPINION_SRC_FP': 225,
  'OPINION_SRC_FN': 504,
  'OPINION_SRC_TP': 487,
  'OPINION_SEG_FP': 740,
  'OPINION_SEG_FN': 901,
  'OPINION_SEG_TP': 158}}

In [98]:
# new_eval(all_docs, interval_check)

{'OPINION_SRC': {'precision': 0.7823033707865169,
  'recall': 0.5509396636993076,
  'f_score': 0.6465467208357516},
 'OPINION_OPR': {'precision': 0.8456883509833586,
  'recall': 0.5545634920634921,
  'f_score': 0.6698621929298982},
 'OPINION_SEG': {'precision': 0.8652561247216035,
  'recall': 0.60703125,
  'f_score': 0.7134986225895316},
 'counter': {'OPINION_OPR_FP': 102,
  'OPINION_OPR_FN': 449,
  'OPINION_OPR_TP': 559,
  'OPINION_SRC_FP': 155,
  'OPINION_SRC_FN': 454,
  'OPINION_SRC_TP': 557,
  'OPINION_SEG_FP': 121,
  'OPINION_SEG_FN': 503,
  'OPINION_SEG_TP': 777}}

# Prounce check

In [287]:
for docs in all_docs:
    for doc in docs:
        if 'opinion_label' in doc.spans:
            print(doc.spans['opinion_label'])
            for span in doc.spans['opinion_label']:
                if span.label_ == 'OPINION_SRC':
                    print(span.text, span.label_, span._.label_id, span._.label_type)
                    for token in span:
                        if len(token._.label_type) > 1:
                            print(token.text, token.pos_, token.dep_, token._.label_type, token._.label_id, type(token._.label_type))

[表示, 民進黨立委范雲, 將提出修法，讓女性有資格服兵役。女性是否納義務役引起各界討論，對女性服兵役是否影響未來民進黨的選情？, 表示, 台南市長黃偉哲, 希望多所考量，並多徵詢各方意見。]
民進黨立委范雲 OPINION_SRC ['KYl-FS1X6n'] ['OPINION_SRC']
台南市長黃偉哲 OPINION_SRC ['8xVRxD3iiH'] ['OPINION_SRC']
[表示, 他, 尊重立委的提案權，有很多國家女性是不用服兵役，因為立法院是合議制，要經過多數立委同意才有辦法修法，對於范雲的主張，他表示尊重。]
他 OPINION_SRC ['k5K_dfPFam', '29Xdoo-Vez'] ['OPINION_SRC', 'Pronoun']
他 PRON nsubj ['OPINION_SRC', 'Pronoun'] ['k5K_dfPFam', '29Xdoo-Vez'] <class 'list'>
[表示, 黃偉哲, 他, 說, 「希望多所考量，並多徵詢各方意見。」, 「這個喔？無從評估！」, 「其實在台灣，已有女性志願役，如要改成義務役，大家還是…」]
黃偉哲 OPINION_SRC ['FPpZC4jq7X'] ['OPINION_SRC']
他 OPINION_SRC ['odsYxV1YeQ', 'XWGmDJV_2H'] ['OPINION_SRC', 'Pronoun']
他 PRON nsubj ['OPINION_SRC', 'Pronoun'] ['odsYxV1YeQ', 'XWGmDJV_2H'] <class 'list'>
[蔡英文總統, 揭示, 執政團隊4項重要任務：照顧人民生活、維持經濟動能、守護國家安全、善盡區域責任，呼籲大家團結一心，讓國家更進步、繁榮、平安、幸福。]
蔡英文總統 OPINION_SRC ['b3KJMm4y9Q'] ['OPINION_SRC']
[蔡英文, 指出, 受到烏俄戰爭、全球通膨、威權擴張的持續衝擊，國際上許多主要機構都預測，2023年全球經濟正持續轉弱，各國不僅要面對經濟、金融的劇烈波動，能源、糧食危機反覆出現的高度不確定性，也讓經濟結構、成長動能備受考驗，台灣也不例外。]
蔡英文 OPINION_SRC ['nB42WeZc7G'] ['OPINION_SRC

In [None]:
for docs in all_docs:
    for doc in docs:
        if 'pronounce_in_label' in doc.spans:
            for span in doc.spans['pronounce_in_label']:
                print("span", doc.spans['pronounce_in_label']) 

In [None]:
for docs in all_docs:
    for doc in docs:
        if len(doc.ents) > 0:
            print("doc", doc.text)
            for ent in doc.ents:
                if ent.label_ == 'PERSON':
                    print("ent", ent.text, ent.label_)

1. CKIP NER len >= 2 (holder or not in span)
2. char match

In [None]:
Span.set_extension("coreference", default=None, force=True)

def found_coreference(docs_for_a_news, source_spangroup_key, source_span_label=None, ner_span_label="PERSON"):
    
    doc_len_list = [len(doc) for doc in docs_for_a_news]
    doc_combine = Doc.from_docs(docs_for_a_news, ensure_whitespace=False)
    
    if source_spangroup_key in doc_combine.spans:
       for span in doc.spans[source_spangroup_key]:
            if source_span_label != None:
                if span.label_ == source_span_label:
                    person_list = [ner for ner in doc[:span.start].ents if ner.label_ == ner_span_label]
                else:
                    person_list = [ner for ner in doc[:span.start].ents] 
                    if len(person_list) > 0:
                        person_list.reverse()
                        for person in person_list:
                            if all(["OPINION_SRC" not in token._.label for token in person]):
                                span._.coreference = person
                                docs_for_a_news

    

In [None]:
def eval_exeact_pronounce(all_docs):
    counter = {
        'Pronoun_FP': 0,
        'Pronoun_FN': 0,
        'Pronoun_TP': 0,
    }
    
    for docs in all_docs:
        for doc in docs:
            spans = {
                'Pronoun_TP': [],
                'Pronoun_FP': [],
                'Pronoun_FN': [],
            }
    
            if 'coreference_label' in doc.spans and 'pronounce_in_label' in doc.spans:
                for match_token in doc.spans['pronounce_in_label']:
                    found_match = False
                    for label_token in doc.spans['coreference_label']:
                        if (label_token.start, label_token.end) == (match_token.start, match_token.end) and label_token.label_ == 'Pronoun':
                            spans[f"Pronoun_TP"].append(label_token)
                            found_match = True
                            break
                    if not found_match:
                        spans["Pronoun_FP"].append(match_token)
                
                for label_token in doc.spans['coreference_label']:
                    if label_token not in spans["Pronoun_TP"]:
                        spans["Pronoun_FN"].append(label_token)
            
            elif 'coreference_label' in doc.spans:
                for label_token in doc.spans['coreference_label']:
                    if label_token.label_ == 'Pronounce':
                        spans["Pronoun_FN"].append(label_token)
            
            elif 'pronounce_in_label' in doc.spans:
                for match_token in doc.spans['pronounce_in_label']:
                    spans["Pronoun_FP"].append(match_token)
            
            for key in counter.keys():
                counter[key] += len(spans[key])
            
    print(counter)

    return {
        'precision': get_precision(counter['Pronoun_TP'], counter['Pronoun_FP']),
        'recall': get_recall(counter['Pronoun_TP'], counter['Pronoun_FN']),
        'f_score': get_f_score(get_precision(counter['Pronoun_TP'], counter['Pronoun_FP']), get_recall(counter['Pronoun_TP'], counter['Pronoun_FN'])),
        'counter': counter
    }