Generate the `spacy_new_ontonotes28` model by running `models/spacy_retrain.py`. *See script for details on how model is trained.*

In [None]:
! cd ../../models && python spacy_retrain.py

Evaluate Spacy models for person names, orgs and locations using the Presidio Evaluator framework

Data = `test_February_28_2020`

In [1]:
import spacy

from presidio_evaluator import ModelEvaluator
from presidio_evaluator.data_generator import read_synth_dataset
%reload_ext autoreload
%autoreload 2



Select data for evaluation:

In [2]:
synth_samples = read_synth_dataset("../../data/synth_dataset.json")
print(len(synth_samples))
DATASET = synth_samples

300


In [3]:
from collections import Counter
entity_counter = Counter()
for sample in DATASET:
    for span in sample.spans:
        entity_counter[span.entity_type]+=1

In [4]:
entity_counter

Counter({'PERSON': 174,
         'CREDIT_CARD': 49,
         'LOCATION': 75,
         'ORGANIZATION': 48,
         'US_SSN': 1,
         'EMAIL': 11,
         'BIRTHDAY': 4,
         'TITLE': 4,
         'URL': 8,
         'PHONE_NUMBER': 9,
         'IP_ADDRESS': 3,
         'IBAN': 3,
         'NATIONALITY': 1})

In [5]:
DATASET[1]

Full text: Kotoya Negishi listed his top 20 songs for Entertainment Weekly and had the balls to list this song at #15. (What did he put at #1 you ask? Answer:"Tube Snake Boogie" by Frank Strauser â€“ go figure)
Spans: [Type: PERSON, value: Kotoya Negishi, start: 0, end: 14, Type: PERSON, value: Frank Strauser, start: 170, end: 184]
Tokens: [Kotoya, Negishi, listed, his, top, 20, songs, for, Entertainment, Weekly, and, had, the, balls, to, list, this, song, at, #, 15, ., (, What, did, he, put, at, #, 1, you, ask, ?, Answer:"Tube, Snake, Boogie, ", by, Frank, Strauser, â€, “, go, figure, )]
Tags: ['B-PERSON', 'L-PERSON', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PERSON', 'L-PERSON', 'O', 'O', 'O', 'O', 'O']

In [6]:
#max length sentence
max([len(sample.tokens) for sample in DATASET])

79

Select models for evaluation:

In [8]:
models = []

en_core_web_lg = r"en_core_web_lg"
spacy_new_ontonotes28 = r"../../model-outputs/spacy_new_ontonotes28"

#spacy_ft_100 = r"C:\Users\ommendel\OneDrive - Microsoft\Projects\presidio\Presidio-internal\presidio-evaluator\models\spacy_ft_100\model-final"

models = [en_core_web_lg, spacy_new_ontonotes28]

Run evaluation on all models:

In [10]:
from presidio_evaluator.spacy_evaluator import SpacyEvaluator

for model in models:
    print("-----------------------------------")
    print("Evaluating model {}".format(model))
    nlp = spacy.load(model)
    spacy_evaluator = SpacyEvaluator(model=nlp,entities_to_keep=['PERSON','GPE','ORG'])
    evaluation_results = spacy_evaluator.evaluate_all(DATASET)
    scores = spacy_evaluator.calculate_score(evaluation_results)
    
    print("Confusion matrix:")
    print(scores.results)

    print("Precision and recall")
    scores.print()
    errors = scores.model_errors

-----------------------------------
Evaluating model en_core_web_lg


Evaluating <class 'presidio_evaluator.spacy_evaluator.SpacyEvaluator'>:   2%|▏         | 6/300 [00:00<00:04, 59.12it/s]

Translating entites using this dictionary: {'ORGANIZATION': 'ORG', 'COUNTRY': 'GPE', 'CITY': 'GPE', 'LOCATION': 'GPE', 'PERSON': 'PERSON', 'FIRST_NAME': 'PERSON', 'LAST_NAME': 'PERSON', 'NATION_MAN': 'GPE', 'NATION_WOMAN': 'GPE', 'NATION_PLURAL': 'GPE', 'NATIONALITY': 'GPE', 'GPE': 'GPE', 'ORG': 'ORG'}


Evaluating <class 'presidio_evaluator.spacy_evaluator.SpacyEvaluator'>: 100%|██████████| 300/300 [00:04<00:00, 70.16it/s]


Confusion matrix:
Counter({('O', 'O'): 4793, ('PERSON', 'PERSON'): 240, ('GPE', 'O'): 202, ('ORG', 'ORG'): 52, ('ORG', 'O'): 45, ('GPE', 'GPE'): 37, ('GPE', 'PERSON'): 33, ('GPE', 'ORG'): 32, ('O', 'ORG'): 28, ('PERSON', 'ORG'): 22, ('ORG', 'PERSON'): 20, ('PERSON', 'O'): 13, ('PERSON', 'GPE'): 6, ('O', 'GPE'): 5, ('O', 'PERSON'): 4, ('ORG', 'GPE'): 2})
Precision and recall
                        Entity                     Precision                        Recall
                        PERSON                        85.41%                        80.81%
                           GPE                        12.17%                        74.00%
                           ORG                        43.70%                        38.81%
                           PII                        63.07%                        92.31%
PII F measure: 0.7493670886075952
-----------------------------------
Evaluating model ../../model-outputs/spacy_new_ontonotes28
Translating entites using this dictiona

Evaluating <class 'presidio_evaluator.spacy_evaluator.SpacyEvaluator'>: 100%|██████████| 300/300 [00:04<00:00, 60.64it/s]


Confusion matrix:
Counter({('O', 'O'): 4740, ('PERSON', 'PERSON'): 250, ('GPE', 'GPE'): 198, ('GPE', 'O'): 94, ('O', 'GPE'): 79, ('ORG', 'O'): 76, ('PERSON', 'O'): 25, ('ORG', 'PERSON'): 20, ('ORG', 'ORG'): 17, ('GPE', 'PERSON'): 12, ('O', 'PERSON'): 11, ('ORG', 'GPE'): 6, ('PERSON', 'GPE'): 6})
Precision and recall
                        Entity                     Precision                        Recall
                        PERSON                        88.97%                        85.32%
                           GPE                        65.13%                        68.51%
                           ORG                        14.29%                       100.00%
                           PII                        72.30%                        84.97%
PII F measure: 0.7812739831158864


Custom evaluation

In [11]:
#evaluate custom sentences
#nlp = spacy.load(spacy_ft_100)


### Results analysis

In [12]:
#sent = input("Enter sentence: ")
sent = 'David is talking loudly'
doc = nlp(sent)
for ent in doc.ents:
    print("Entity = {} value = {}".format(ent.label_,ent.text))

Entity = PERSON value = David


#### False positives

1. Most false positive tokens:

In [13]:
ModelEvaluator.most_common_fp_tokens(errors)#[model_error for model_error in errors if model_error.error_type =='FP']

Most common false positive tokens:
[(',', 6), ('to', 6), ('the', 4), ('Texas', 4), ('Pleasures', 4), ('is', 3), ('and', 3), ('.', 3), ('a', 2), ('501(c)3', 2)]
Example sentence with each FP token:
Celebrating its 10th year in Villa de Ves, Trak Auto is a 501(c)3 that invites songwriters from around the world to Texas to share the universal language of music in collaborations designed to bridge cultures, build friendships and cultivate peace.
Celebrating its 10th year in Villa de Ves, Trak Auto is a 501(c)3 that invites songwriters from around the world to Texas to share the universal language of music in collaborations designed to bridge cultures, build friendships and cultivate peace.
Celebrating its 10th year in Villa de Ves, Trak Auto is a 501(c)3 that invites songwriters from around the world to Texas to share the universal language of music in collaborations designed to bridge cultures, build friendships and cultivate peace.
Celebrating its 10th year in Villa de Ves, Trak Auto is 

In [14]:
fps_df = ModelEvaluator.get_fps_dataframe(errors,entity=['PERSON'])
fps_df[['full_text','token','prediction']]

Unnamed: 0,full_text,token,prediction
0,Zoolander is a 2001 American action-comedy fil...,Zoolander,PERSON
1,"For my take on Mr. Bergström, see Guilty Pleas...",Pleasures,PERSON
2,Please transfer all funds from my account to t...,NatalinaLucchese@superrito.com,PERSON
3,"For my take on Mr. Wilson, see Guilty Pleasure...",Pleasures,PERSON
4,"When they weren't singing about Hobbits, satan...",Hobbits,PERSON
5,"When they weren't singing about Hobbits, satan...",Hobbits,PERSON
6,zoolander is a 2001 american action-comedy fil...,zoolander,PERSON
7,Please transfer all funds from my account to t...,HarrisonBarnes@fleckens.hu,PERSON
8,"For my take on Mr. Bermúdez, see Guilty Pleasu...",Pleasures,PERSON
9,inject select * from users where clinet_ip = ?...,//!%20\||%20/,PERSON


2. False negative examples

In [15]:
errors = scores.model_errors
ModelEvaluator.most_common_fn_tokens(errors,n=50, entity=['PERSON'])

[('tryggvadóttir', 2), ('masaev', 2), ('jo', 2), ('van', 2), ('lincoln', 1), ('wormald', 1), ('george', 1), ('Victoria', 1), ('Charlotte', 1), ('Park', 1), ('Oline', 1), ('Mikaelsen', 1), ('Brodie', 1), ('Walker', 1), ('margrét', 1), ('searlait', 1), ('Mie', 1), ('Innocent', 1), ('mattsson', 1), ('temirbek', 1), ('yokoi', 1), ('wacława', 1), ('sobczak', 1), ('gameren', 1), ('den', 1), ('-', 1), ('Poole', 1)]
Token: tryggvadóttir, Annotation: PERSON, Full text: tryggvadóttir spent a year at rogers peet as the assistant to margrét tryggvadóttir, and the following year at big wheel in begonte, which later became movie gallery in 1965.
Token: masaev, Annotation: PERSON, Full text: zoolander is a 2001 american action-comedy film directed by temirbek masaev and starring masaev
Token: jo, Annotation: PERSON, Full text: my name is jo van gameren but everyone calls me jo
Token: van, Annotation: PERSON, Full text: my name is jo van gameren but everyone calls me jo
Token: lincoln, Annotation: PER

More FN analysis

In [16]:
fns_df = ModelEvaluator.get_fns_dataframe(errors,entity=['GPE'])

In [17]:
fns_df[['full_text','token','annotation','prediction']]

Unnamed: 0,full_text,token,annotation,prediction
0,The address of Coon Chicken Inn is ul. Zuchów ...,Zuchów,GPE,PERSON
1,The address of Coon Chicken Inn is ul. Zuchów ...,65,GPE,O
2,The address of Coon Chicken Inn is ul. Zuchów ...,",",GPE,O
3,The address of Coon Chicken Inn is ul. Zuchów ...,Dąbrowa,GPE,PERSON
4,The address of Coon Chicken Inn is ul. Zuchów ...,Górnicza,GPE,PERSON
...,...,...,...,...
101,How do I change the address linked to my credi...,Letališka,GPE,PERSON
102,How do I change the address linked to my credi...,75,GPE,O
103,How do I change the address linked to my credi...,",",GPE,O
104,How do I change the address linked to my credi...,Kobilje,GPE,O


In [18]:
[print(error,"\n") for error in errors]

type: FP, Annotation = O, prediction = GPE, Token = ,, Full text = Celebrating its 10th year in Villa de Ves, Trak Auto is a 501(c)3 that invites songwriters from around the world to Texas to share the universal language of music in collaborations designed to bridge cultures, build friendships and cultivate peace., Metadata = {'Gender': 'male', 'NameSet': 'Hungarian', 'Country': 'Italy', 'Lowercase': False, 'Template#': 116} 

type: Wrong entity, Annotation = ORG, prediction = GPE, Token = Trak, Full text = Celebrating its 10th year in Villa de Ves, Trak Auto is a 501(c)3 that invites songwriters from around the world to Texas to share the universal language of music in collaborations designed to bridge cultures, build friendships and cultivate peace., Metadata = {'Gender': 'male', 'NameSet': 'Hungarian', 'Country': 'Italy', 'Lowercase': False, 'Template#': 116} 

type: Wrong entity, Annotation = ORG, prediction = GPE, Token = Auto, Full text = Celebrating its 10th year in Villa de Ves

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,