In [None]:
from presidio_evaluator.data_generator import read_synth_dataset
from presidio_evaluator.evaluation import ModelError, Evaluator
from presidio_evaluator.models import BaseModel, PresidioAnalyzerWrapper
from collections import Counter

import pandas as pd

%load_ext autoreload
%autoreload 2

pd.options.display.max_columns = None
pd.options.display.width=None

# Evaluate Presidio Analyzer
This notebook runs the PresidioAnalyzerEvaluator class on top of synthetic data.

One can perform the following changes:
1. Replace the synthetic data creation with real data or with other type of synthetic data
2. Adapt the Presidio `AnalyzerEngine` to a specific engine with a different set of recognizers or configured to be used on different languages




#### A. Read dataset for evaluation

In [None]:
input_samples = read_synth_dataset("../data/synth_dataset.txt")
print("Read {} samples".format(len(input_samples)))
input_samples[0]

#### B. Descriptive statistics

In [None]:
flatten = lambda l: [item for sublist in l for item in sublist]

count_per_entity = Counter([span.entity_type for span in flatten([input_sample.spans for input_sample in input_samples])])
count_per_entity

#### C. Match the dataset's entity names with Presidio's entity names

In [None]:
presidio_entities_map = {
    "PERSON": "PERSON",
    "EMAIL_ADDRESS": "EMAIL_ADDRESS",
    "CREDIT_CARD": "CREDIT_CARD",
    "FIRST_NAME": "PERSON",
    "PHONE_NUMBER": "PHONE_NUMBER",
    "BIRTHDAY": "DATE_TIME",
    "DATE_TIME": "DATE_TIME",
    "DOMAIN": "DOMAIN",
    "CITY": "LOCATION",
    "ADDRESS": "LOCATION",
    "NATIONALITY": "LOCATION",
    "LOCATION": "LOCATION",
    "IBAN": "IBAN_CODE",
    "URL": "DOMAIN_NAME",
    "US_SSN": "US_SSN",
    "IP_ADDRESS": "IP_ADDRESS",
    "ORGANIZATION": "ORG",
    "TITLE" : "O", # skipping evaluation of titles
    "O": "O",
}

new_list = Evaluator.align_entity_types(input_samples, presidio_entities_map)

#### D. Recalculate statistics on updated dataset

In [None]:
## recheck counter
count_per_entity_new = Counter([span.entity_type for span in flatten([input_sample.spans for input_sample in new_list])])
count_per_entity_new

#### E. Run the presidio-evaluator framework with Presidio's API as the 'model' at test

In [None]:
presidio = PresidioAnalyzerWrapper(entities_to_keep=list(count_per_entity_new.keys()))
evaluator = Evaluator(model=presidio)
evaluted_samples = evaluator.evaluate_all(new_list[:100])

#### F. Extract statistics
- Presicion, recall and F measure are calculated based on a PII/Not PII binary classification per token.
- Specific entity recall and precision are calculated on the specific PII entity level.

In [None]:
evaluation_result = evaluator.calculate_score(evaluted_samples)

In [None]:
evaluation_result.print()

#### G. Analyze wrong predictions

In [None]:
errors = evaluation_result.model_errors

In [None]:
ModelError.most_common_fp_tokens(errors,n=5)

In [None]:
fps_df = ModelError.get_fps_dataframe(errors,entity='PERSON')
if fps_df is not None:
    fps_df[['full_text','token','prediction']]

In [None]:
fns_df = ModelError.get_fns_dataframe(errors,entity='PERSON')
fns_df