In [1]:
from presidio_evaluator.data_generator import read_synth_dataset
from presidio_evaluator import ModelEvaluator
from collections import Counter

%load_ext autoreload
%autoreload 2

MY_PRESIDIO_ENDPOINT = "http://presidio-api.westeurope.cloudapp.azure.com/api/v1/projects/test/analyze"

## Evaluate your Presidio instance via the Presidio API

#### A. Read dataset for evaluation

In [2]:
input_samples = read_synth_dataset("../data/synth_dataset.json")
print("Read {} samples".format(len(input_samples)))

Read 300 samples


#### B. Descriptive statistics

In [3]:
flatten = lambda l: [item for sublist in l for item in sublist]

count_per_entity = Counter([span.entity_type for span in flatten([input_sample.spans for input_sample in input_samples])])
count_per_entity

Counter({'PERSON': 174,
         'CREDIT_CARD': 49,
         'LOCATION': 75,
         'ORGANIZATION': 48,
         'US_SSN': 1,
         'EMAIL': 11,
         'BIRTHDAY': 4,
         'TITLE': 4,
         'URL': 8,
         'PHONE_NUMBER': 9,
         'IP_ADDRESS': 3,
         'IBAN': 3,
         'NATIONALITY': 1})

#### C. Match the dataset's entity names with Presidio's entity names

In [4]:
# Mapping between dataset entities and Presidio entities. Key: Dataset entity, Value: Presidio entity
entities_mapping = {
    'PERSON': 'PERSON',
    'EMAIL': 'EMAIL_ADDRESS',
    'CREDIT_CARD': 'CREDIT_CARD',
    'FIRST_NAME': 'PERSON',
    'PHONE_NUMBER': 'PHONE_NUMBER',
    'LOCATION':'LOCATION',
    # 'BIRTHDAY': 'DATE_TIME',
    # 'DATE': 'DATE_TIME',
    'DOMAIN': 'DOMAIN',
    #    'CITY': 'LOCATION',
    #    'ADDRESS': 'LOCATION',
    'IBAN': 'IBAN_CODE',
    # 'URL': 'DOMAIN_NAME',
    'US_SSN': 'US_SSN',
    'IP_ADDRESS': 'IP_ADDRESS',
    # 'ORGANIZATION':'ORG'
    'O': 'O'
}
presidio_fields = ['CREDIT_CARD', 'CRYPTO', 'DATE_TIME', 'DOMAIN_NAME', 'EMAIL_ADDRESS', 'IBAN_CODE',
                           'IP_ADDRESS', 'NRP', 'LOCATION', 'PERSON', 'PHONE_NUMBER', 'US_SSN']

new_list = ModelEvaluator.align_input_samples_to_presidio_analyzer(input_samples,
                                                                   entities_mapping,
                                                                   presidio_fields)

#### D. Recalculate statistics on updated dataset

In [5]:
## recheck counter
count_per_entity_new = Counter([span.entity_type for span in flatten([input_sample.spans for input_sample in new_list])])
count_per_entity_new

Counter({'PERSON': 174,
         'CREDIT_CARD': 49,
         'LOCATION': 75,
         'US_SSN': 1,
         'EMAIL_ADDRESS': 11,
         'PHONE_NUMBER': 9,
         'IP_ADDRESS': 3,
         'IBAN_CODE': 3})

#### E. Run the presidio-evaluator framework with Presidio's API as the 'model' at test

In [None]:
from presidio_evaluator import PresidioAPIEvaluator
presidio = PresidioAPIEvaluator(entities_to_keep=list(count_per_entity_new.keys()),endpoint=MY_PRESIDIO_ENDPOINT)
evaluted_samples = presidio.evaluate_all(new_list[:100])

Evaluating <class 'presidio_evaluator.presidio_api_evaluator.PresidioAPIEvaluator'>:  19%|█▉        | 19/100 [00:24<01:45,  1.30s/it]

#### F. Extract statistics
- Presicion, recall and F measure are calculated based on a PII/Not PII binary classification per token.
- Specific entity recall and precision are calculated on the specific PII entity level.

In [7]:
evaluation_result = presidio.calculate_score(evaluted_samples)

In [8]:
evaluation_result.print()

                        Entity                     Precision                        Recall
                        PERSON                        76.67%                        75.82%
                   CREDIT_CARD                       100.00%                       100.00%
                      LOCATION                         8.87%                        57.89%
                        US_SSN                          nan%                          nan%
                 EMAIL_ADDRESS                       100.00%                       100.00%
                  PHONE_NUMBER                         0.00%                          nan%
                    IP_ADDRESS                          nan%                          nan%
                     IBAN_CODE                          nan%                          nan%
                           PII                        51.71%                        94.53%
PII F measure: 0.6685082872928177


#### G. Analyze wrong predictions

In [9]:
errors = evaluation_result.model_errors

In [10]:
ModelEvaluator.most_common_fp_tokens(errors,n=5)

Most common false positive tokens:
[('Texas', 2), ('Eng', 1), ('Dun', 1), ('Rite', 1), ('Lawn', 1)]
Example sentence with each FP token:
Celebrating its 10th year in Villa de Ves, Trak Auto is a 501(c)3 that invites songwriters from around the world to Texas to share the universal language of music in collaborations designed to bridge cultures, build friendships and cultivate peace.
Hello, this is Eng. Bojka Žilih. Who are you?
Dun Rite Lawn Care is the brainchild of our 3 founders: Charlotte Park, Oline Mikaelsen and Brodie Walker.  The idea was born (on the beach) while they were constructing a website to be the basis of another start-up idea.
Dun Rite Lawn Care is the brainchild of our 3 founders: Charlotte Park, Oline Mikaelsen and Brodie Walker.  The idea was born (on the beach) while they were constructing a website to be the basis of another start-up idea.
Dun Rite Lawn Care is the brainchild of our 3 founders: Charlotte Park, Oline Mikaelsen and Brodie Walker.  The idea was bor

In [11]:
fps_df = ModelEvaluator.get_fps_dataframe(errors,entity='PERSON')
fps_df[['full_text','token','prediction']]

Unnamed: 0,full_text,token,prediction
0,"Hello, this is Eng. Bojka Žilih. Who are you?",Eng,PERSON
1,Dun Rite Lawn Care is the brainchild of our 3 ...,Dun,PERSON
2,Dun Rite Lawn Care is the brainchild of our 3 ...,Rite,PERSON
3,Dun Rite Lawn Care is the brainchild of our 3 ...,Lawn,PERSON
4,Dun Rite Lawn Care is the brainchild of our 3 ...,Care,PERSON


In [12]:
fns_df = ModelEvaluator.get_fns_dataframe(errors,entity='PERSON')
fns_df

Unnamed: 0,error_type,annotation,prediction,token,full_text,Gender,NameSet,Country,Lowercase,Template#
0,FN,PERSON,O,Kotoya,Kotoya Negishi listed his top 20 songs for Ent...,male,American,France,False,103
1,FN,PERSON,O,Negishi,Kotoya Negishi listed his top 20 songs for Ent...,male,American,France,False,103
2,FN,PERSON,O,lincoln,from the film american graffiti (also features...,male,Australian,Monaco,True,104
3,FN,PERSON,O,wormald,from the film american graffiti (also features...,male,Australian,Monaco,True,104
4,FN,PERSON,O,Avtorhan,"Avtorhan, can I please speak to your boss?",male,Chechen (Latin),Portugal,False,37
5,FN,PERSON,O,george,"george, can i please speak to your boss?",male,Australian,Djibouti,True,37
6,Wrong entity,PERSON,LOCATION,Nusa,have you heard Nusa Márkus speak yet?,female,Hungarian,Guinea,False,91
7,Wrong entity,PERSON,LOCATION,Márkus,have you heard Nusa Márkus speak yet?,female,Hungarian,Guinea,False,91
8,FN,PERSON,O,sofie,sometimes people call me sofie,female,Greenland,Australia,True,74
9,FN,PERSON,O,Herceg,What's your last name? Herceg,female,Croatian,Dominican Republic,False,67
