# Model Evaluation



In [1]:
!pip install -U -q PyDrive

# Import libraries for accessing Google Drive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

text_link = "1Y6R86spq5l1-IfSa5GvbKhFksRyU4EK6"

downloaded = drive.CreateFile({'id':text_link})
downloaded.GetContentFile('text_link.txt')
with open('text_link.txt') as f:
    text = f.read()

adjucated_annotations_link = "1DRw7Dv5rm6GFb1fkBbl2bv-rwfGgY7o0"

downloaded = drive.CreateFile({'id':adjucated_annotations_link})
downloaded.GetContentFile('adjucated_annotations.txt')
with open('adjucated_annotations.txt') as f:
    adjucated_annotations = f.readlines()

In [2]:
!pip install flair # https://github.com/flairNLP/flair 
!pip install spacy # https://github.com/explosion/spaCy , https://spacy.io/
!pip install stanza # https://github.com/stanfordnlp/stanza , https://stanfordnlp.github.io/stanza/

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
import flair
from flair.data import Sentence
from flair.nn import Classifier
import spacy
import stanza

# Helper functions

In [4]:
def FScore(annotations1, annotations2): 
    annotations1_tuples = set(tuple(line.strip().split(' - ')) for line in annotations1)
    annotations2_tuples = set(tuple(line.strip().split(' - ')) for line in annotations2)
    TP = annotations1_tuples.intersection(annotations2_tuples)
    FN = annotations1_tuples - annotations2_tuples
    FP = annotations2_tuples - annotations1_tuples
    P = len(TP) / (len(TP) + len(FP))
    R = len(TP) / (len(TP) + len(FN))
    F = 2 * (P * R) / (P + R)
    return F

In [5]:
def get_line_and_word_indices(pos):
    # Initialize variables
    line_index = 0
    word_index = 0
    
    # Loop through each line to find the one containing the position
    for i, line in enumerate(text.split("\n")):
        line_len = len(line) + 1  # Add 1 for the newline character
        if pos < line_index + line_len:
            # Compute the word index for the position
            words = line.split()
            for j, word in enumerate(words):
                word_len = len(word) + 1  # Add 1 for the space character
                if pos < line_index + word_index + word_len:
                    word_index += pos - line_index - word_index
                    break
                word_index += word_len
            break
        word_index = 0
        line_index += line_len
    
    return i, j


# Flair

In [6]:
from flair.nn import Classifier
from flair.data import Sentence

# load the model
tagger = Classifier.load('ner-ontonotes')

# make a sentence
sentence = Sentence('George Washington went to Washington.')

# predict NER tags
tagger.predict(sentence)

2023-05-11 07:56:10,980 SequenceTagger predicts: Dictionary with 75 tags: O, S-PERSON, B-PERSON, E-PERSON, I-PERSON, S-GPE, B-GPE, E-GPE, I-GPE, S-ORG, B-ORG, E-ORG, I-ORG, S-DATE, B-DATE, E-DATE, I-DATE, S-CARDINAL, B-CARDINAL, E-CARDINAL, I-CARDINAL, S-NORP, B-NORP, E-NORP, I-NORP, S-MONEY, B-MONEY, E-MONEY, I-MONEY, S-PERCENT, B-PERCENT, E-PERCENT, I-PERCENT, S-ORDINAL, B-ORDINAL, E-ORDINAL, I-ORDINAL, S-LOC, B-LOC, E-LOC, I-LOC, S-TIME, B-TIME, E-TIME, I-TIME, S-WORK_OF_ART, B-WORK_OF_ART, E-WORK_OF_ART, I-WORK_OF_ART, S-FAC


In [7]:
sentence = Sentence(text)
tagger.predict(sentence)

In [8]:
entity = sentence.get_spans('ner')[100]
print(entity.tokens)
print(get_line_and_word_indices(entity.start_position), get_line_and_word_indices(entity.end_position))

[Token[2808]: "about", Token[2809]: "eleven", Token[2810]: "o'clock"]
(295, 7) (295, 9)


In [9]:
def extract_tag(tag):
    if tag == "PERSON": 
        return "PER"
    if tag == "GPE": 
        return "GPE"
    if tag == "LOC":
        return "LOC"
    if tag == "FAC":
        return "FAC"
    if tag == "ORG": 
        return "ORG"
    return None

In [10]:
annotation_lines = []
for annotation in sentence.get_spans('ner'): 
    tag = extract_tag(str(annotation.tag))
    if tag == None: 
        continue
    start = get_line_and_word_indices(annotation.start_position)
    end = get_line_and_word_indices(annotation.end_position)
    if start == end: 
        index = start
    else: 
        index = (start, end)
    annotation_lines.append(str(index)+ " - " + tag)

In [11]:
flair_fscore = FScore(annotation_lines, adjucated_annotations)

In [22]:
with open("flair.txt", "w") as f:
    # Write each string to a new line in the file
    for string in annotation_lines:
        f.write(string + "\n")

# Stanza


In [13]:
pipe = stanza.Pipeline("en", processors="tokenize,ner", package={"ner": ["ontonotes"]})
doc = pipe(text)

ents = [each.to_dict() for each in doc.ents]
filtered = list(filter(lambda ent: ent["type"] in ["PERSON","GPE","LOC","ORG","FAC","VEH"], ents))
stanza_annotations = []
for entity in filtered: 
    start = get_line_and_word_indices(entity["start_char"])
    end = get_line_and_word_indices(entity["end_char"])
    if start == end: 
        index = start
    else: 
        index = (start, end)
    if entity["type"] == "PERSON":
      entity["type"] = "PER"
    stanza_annotations.append(str(index)+ " - " + entity["type"])
stanza_fscore = FScore(stanza_annotations, adjucated_annotations)


INFO:stanza:Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.5.0.json:   0%|   …

INFO:stanza:Loading these models for language: en (English):
| Processor | Package   |
-------------------------
| tokenize  | combined  |
| ner       | ontonotes |

INFO:stanza:Using device: cuda
INFO:stanza:Loading: tokenize
INFO:stanza:Loading: ner
INFO:stanza:Done loading processors!


In [14]:
with open("stanza.txt", "w") as f:
    # Write each string to a new line in the file
    for string in stanza_annotations:
        f.write(string + "\n")

# Spacy

In [23]:
import spacy

# Load English tokenizer, tagger, parser and NER
nlp = spacy.load("en_core_web_sm")

# Process whole documents
# text = ("When Sebastian Thrun started working on self-driving cars at "
#         "Google in 2007, few people outside of the company took him "
#         "seriously. “I can tell you very senior CEOs of major American "
#         "car companies would shake my hand and turn away because I wasn’t "
#         "worth talking to,” said Thrun, in an interview with Recode earlier "
#         "this week.")
doc = nlp(text.replace('\n',' '))

In [24]:
def new_label(label):
  if label == 'PERSON':
    return 'PER'
  else:
    return label

In [25]:
def annotation(a,b):
  if a==b:
    return str(a)
  else:
    return '('+str(a)+', '+str(b)+')'

In [26]:
index_list = [annotation(get_line_and_word_indices(entity.start_char),get_line_and_word_indices(entity.end_char))+' - '+new_label(entity.label_) for entity in doc.ents if entity.label_ == 'PERSON' or entity.label_ == 'ORG' or entity.label_ == 'GPE' or entity.label_ == 'LOC' or entity.label_ == 'FAC' ]

In [27]:
spacy_score = FScore(index_list, adjucated_annotations)

In [28]:
with open("spacy.txt", "w") as f:
    # Write each string to a new line in the file
    for string in index_list:
        f.write(string + "\n")

# Compare

In [21]:
scores = ["Flair - " + str(flair_fscore), 
"SpaCy - " + str(stanza_fscore), 
"Stanza - " + str(spacy_score)]

with open("3-scores.txt", "w") as f:
    # Write each string to a new line in the file
    for string in scores:
        f.write(string + "\n")