In [None]:
from presidio_evaluator.data_generator import read_synth_dataset
from presidio_evaluator import ModelEvaluator
from collections import Counter

%load_ext autoreload
%autoreload 2

MY_PRESIDIO_ENDPOINT = "http://presidio-api.westeurope.cloudapp.azure.com/api/v1/projects/test/analyze"

## Evaluate your Presidio instance via the Presidio API

#### A. Read dataset for evaluation

In [None]:
input_samples = read_synth_dataset("../data/synth_dataset.txt")
print("Read {} samples".format(len(input_samples)))

#### B. Descriptive statistics

In [None]:
flatten = lambda l: [item for sublist in l for item in sublist]

count_per_entity = Counter([span.entity_type for span in flatten([input_sample.spans for input_sample in input_samples])])
count_per_entity

#### C. Match the dataset's entity names with Presidio's entity names

In [None]:
# Mapping between dataset entities and Presidio entities. Key: Dataset entity, Value: Presidio entity
entities_mapping = {
    'PERSON': 'PERSON',
    'EMAIL': 'EMAIL_ADDRESS',
    'CREDIT_CARD': 'CREDIT_CARD',
    'FIRST_NAME': 'PERSON',
    'PHONE_NUMBER': 'PHONE_NUMBER',
    'LOCATION':'LOCATION',
    # 'BIRTHDAY': 'DATE_TIME',
    # 'DATE': 'DATE_TIME',
    'DOMAIN': 'DOMAIN',
    #    'CITY': 'LOCATION',
    #    'ADDRESS': 'LOCATION',
    'IBAN': 'IBAN_CODE',
    # 'URL': 'DOMAIN_NAME',
    'US_SSN': 'US_SSN',
    'IP_ADDRESS': 'IP_ADDRESS',
    # 'ORGANIZATION':'ORG'
    'O': 'O'
}
presidio_fields = ['CREDIT_CARD', 'CRYPTO', 'DATE_TIME', 'DOMAIN_NAME', 'EMAIL_ADDRESS', 'IBAN_CODE',
                           'IP_ADDRESS', 'NRP', 'LOCATION', 'PERSON', 'PHONE_NUMBER', 'US_SSN']

new_list = ModelEvaluator.align_input_samples_to_presidio_analyzer(input_samples,
                                                                   entities_mapping,
                                                                   presidio_fields)

#### D. Recalculate statistics on updated dataset

In [None]:
## recheck counter
count_per_entity_new = Counter([span.entity_type for span in flatten([input_sample.spans for input_sample in new_list])])
count_per_entity_new

#### E. Run the presidio-evaluator framework with Presidio's API as the 'model' at test

In [None]:
from presidio_evaluator import PresidioAPIEvaluator
presidio = PresidioAPIEvaluator(entities_to_keep=list(count_per_entity_new.keys()),endpoint=MY_PRESIDIO_ENDPOINT)
evaluted_samples = presidio.evaluate_all(new_list[:100])

#### F. Extract statistics
- Presicion, recall and F measure are calculated based on a PII/Not PII binary classification per token.
- Specific entity recall and precision are calculated on the specific PII entity level.

In [None]:
evaluation_result = presidio.calculate_score(evaluted_samples)

In [None]:
evaluation_result.print()

#### G. Analyze wrong predictions

In [None]:
errors = evaluation_result.model_errors

In [None]:
ModelEvaluator.most_common_fp_tokens(errors,n=5)

In [None]:
fps_df = ModelEvaluator.get_fps_dataframe(errors,entity='PERSON')
fps_df[['full_text','token','prediction']]

In [None]:
fns_df = ModelEvaluator.get_fns_dataframe(errors,entity='PERSON')
fns_df