Evaluate Presidio Analyzer using the Presidio Evaluator framework

In [1]:
%cd ..
!ls

/Users/mattc/Desktop/presidio/presidio-research-fork
CODE_OF_CONDUCT.md              experiment_20220807-074747.json
LICENSE                         experiment_20220807-075704.json
NOTICE                          experiment_20220807-075836.json
README.md                       [34mnotebooks[m[m
SECURITY.md                     [34mpresidio_evaluator[m[m
VERSION                         pytest.ini
azure-pipelines.yml             requirements.txt
[34mdata[m[m                            requirements_all.txt
[34mdocs[m[m                            setup.cfg
experiment_20220807-073124.json setup.py
experiment_20220807-073416.json [34mtests[m[m
experiment_20220807-074044.json [34mvenv[m[m


In [2]:
from pathlib import Path
from copy import deepcopy
from pprint import pprint
from collections import Counter

from presidio_evaluator import InputSample
from presidio_evaluator.evaluation import Evaluator, ModelError
from presidio_evaluator.models import PresidioAnalyzerWrapper
from presidio_evaluator.experiment_tracking import get_experiment_tracker

import pandas as pd

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

%reload_ext autoreload
%autoreload 2

stanza and spacy_stanza are not installed
Flair is not installed by default
Flair is not installed


Select data for evaluation:

In [4]:
# dataset_name = "synth_dataset_v2.json"
# dataset = InputSample.read_dataset_json(f"data/{dataset_name}")
# print(len(dataset))

dataset_name = "generated_size_8_date_August_07_2022.json"
dataset = InputSample.read_dataset_json(f"data/{dataset_name}")
print(len(dataset))

tokenizing input:   0%|          | 0/8 [00:00<?, ?it/s]

loading model en_core_web_sm


tokenizing input: 100%|██████████| 8/8 [00:00<00:00, 16.60it/s]

8





In [5]:
entity_counter = Counter()
for sample in dataset:
    for tag in sample.tags:
        entity_counter[tag] += 1

In [6]:
print("Count per entity:")
pprint(entity_counter.most_common())

print("\nExample sentence:")
print(dataset[1])

print("\nMin and max number of tokens in dataset:")
print(
    f"Min: {min([len(sample.tokens) for sample in dataset])}, "
    f"Max: {max([len(sample.tokens) for sample in dataset])}"
)

print("\nMin and max sentence length in dataset:")
print(
    f"Min: {min([len(sample.full_text) for sample in dataset])}, "
    f"Max: {max([len(sample.full_text) for sample in dataset])}"
)

Count per entity:
[('O', 97), ('DATE_TIME', 30), ('PERSON', 8), ('ORGANIZATION', 1)]

Example sentence:
Full text: <b>Comparison:</b>  2020-05-27 13:16:41<br /><br />
Spans: [Type: DATE_TIME, value: 2020-05-27 13:16:41, start: 20, end: 39]
Tokens: <b>Comparison:</b>  2020-05-27 13:16:41<br /><br />
Tags: ['O', 'O', 'O', 'O', 'O', 'O', 'DATE_TIME', 'DATE_TIME', 'DATE_TIME', 'DATE_TIME', 'DATE_TIME', 'DATE_TIME', 'O', 'O', 'O', 'O', 'O']


Min and max number of tokens in dataset:
Min: 6, Max: 25

Min and max sentence length in dataset:
Min: 40, Max: 129


In [7]:
dataset[0]

Full text: <br /><br /><br />Electronically signed by: Bence Földessi Annikki Järventaus DO (1989-06-24 21:44:24 <br /><br />
Spans: [Type: DATE_TIME, value: 1989-06-24 21:44:24, start: 82, end: 101, Type: PERSON, value: Annikki Järventaus, start: 59, end: 77, Type: PERSON, value: Bence Földessi, start: 44, end: 58]
Tokens: <br /><br /><br />Electronically signed by: Bence Földessi Annikki Järventaus DO (1989-06-24 21:44:24 <br /><br />
Tags: ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'PERSON', 'PERSON', 'PERSON', 'PERSON', 'O', 'O', 'DATE_TIME', 'DATE_TIME', 'DATE_TIME', 'DATE_TIME', 'DATE_TIME', 'DATE_TIME', 'O', 'O', 'O', 'O', 'O']

Run evaluation:

In [8]:
print("Evaluating Presidio Analyzer")

experiment = get_experiment_tracker()
model_name = "Presidio Analyzer"
model = PresidioAnalyzerWrapper()

evaluator = Evaluator(model=model)
dataset = Evaluator.align_entity_types(
    deepcopy(dataset), entities_mapping=PresidioAnalyzerWrapper.presidio_entities_map
)

evaluation_results = evaluator.evaluate_all(dataset)
results = evaluator.calculate_score(evaluation_results)

# update params tracking
params = {"dataset_name": dataset_name, "model_name": model_name}
params.update(model.to_log())
experiment.log_parameters(params)
experiment.log_dataset_hash(dataset)
experiment.log_metrics(results.to_log())
entities, confmatrix = results.to_confusion_matrix()
experiment.log_confusion_matrix(matrix=confmatrix, labels=entities)

print("Confusion matrix:")
print(pd.DataFrame(confmatrix, columns=entities, index=entities))

print("Precision and recall")
print(results)

# end experiment
experiment.end()

Evaluating Presidio Analyzer
Entities supported by this Presidio Analyzer instance:
US_ITIN, US_PASSPORT, IP_ADDRESS, AU_ACN, NRP, IBAN_CODE, US_BANK_NUMBER, AU_TFN, PHONE_NUMBER, CREDIT_CARD, AU_ABN, MEDICAL_LICENSE, EMAIL_ADDRESS, UK_NHS, LOCATION, CRYPTO, US_DRIVER_LICENSE, US_SSN, DATE_TIME, PERSON, URL, AU_MEDICARE, SG_NRIC_FIN


Evaluating <class 'presidio_evaluator.models.presidio_analyzer_wrapper.PresidioAnalyzerWrapper'>: 100%|██████████| 8/8 [00:00<00:00, 78.20it/s]

Confusion matrix:
           DATE_TIME   O  PERSON
DATE_TIME         28   2       0
O                  5  92       1
PERSON             0   2       6
Precision and recall
              Entity           Precision              Recall   Number of samples
           DATE_TIME              84.85%              93.33%                  30
              PERSON              85.71%              75.00%                   8
                 PII              85.00%              89.47%                  38
PII F measure: 88.83%
saving experiment data to experiment_20220807-080215.json





### Results analysis

In [9]:
sent = "Electronically Signed by Matthew Chung."
# sent = input("Enter sentence: ")
model.predict(InputSample(full_text=sent))

['O', 'O', 'O', 'PERSON', 'PERSON', 'O']

In [10]:
errors = results.model_errors

#### False positives

1. Most false positive tokens:

In [11]:
ModelError.most_common_fp_tokens(errors)

Most common false positive tokens:
[('Petersons', 1), ('earlier', 1), ('same', 1), ('day', 1), ('1007', 1), ('hrs', 1)]
Example sentence with each FP token:
The procedure was performed at Petersons
PREVIOUS REPORT: HISTORY: Subcutaneous emphysema. Comparison chest radiograph earlier same day, 1007 hrs and 1972-07-19 01:01:18.
PREVIOUS REPORT: HISTORY: Subcutaneous emphysema. Comparison chest radiograph earlier same day, 1007 hrs and 1972-07-19 01:01:18.
PREVIOUS REPORT: HISTORY: Subcutaneous emphysema. Comparison chest radiograph earlier same day, 1007 hrs and 1972-07-19 01:01:18.
PREVIOUS REPORT: HISTORY: Subcutaneous emphysema. Comparison chest radiograph earlier same day, 1007 hrs and 1972-07-19 01:01:18.
PREVIOUS REPORT: HISTORY: Subcutaneous emphysema. Comparison chest radiograph earlier same day, 1007 hrs and 1972-07-19 01:01:18.


In [12]:
# fps_df = ModelError.get_fps_dataframe(errors, entity=["LOCATION"])
# fps_df[["full_text", "token", "prediction"]].head()

2. False negative examples

In [13]:
ModelError.most_common_fn_tokens(errors, n=50, entity=["PERSON"])

[('Bence', 1), ('Földessi', 1)]
Token: Bence, Annotation: PERSON, Full text: <br /><br /><br />Electronically signed by: Bence Földessi Annikki Järventaus DO (1989-06-24 21:44:24 <br /><br />
Token: Földessi, Annotation: PERSON, Full text: <br /><br /><br />Electronically signed by: Bence Földessi Annikki Järventaus DO (1989-06-24 21:44:24 <br /><br />


More FN analysis

In [14]:
fns_df = ModelError.get_fns_dataframe(errors, entity=["PERSON"])

In [15]:
fns_df[["full_text", "token", "annotation", "prediction"]].head()

Unnamed: 0,full_text,token,annotation,prediction
0,<br /><br /><br />Electronically signed by: Bence Földessi Annikki Järventaus DO (1989-06-24 21:44:24 <br /><br />,Bence,PERSON,O
1,<br /><br /><br />Electronically signed by: Bence Földessi Annikki Järventaus DO (1989-06-24 21:44:24 <br /><br />,Földessi,PERSON,O


In [16]:
print("All errors:\n")
[print(error, "\n") for error in errors][:10]

All errors:

type: FN, Annotation = PERSON, prediction = O, Token = Bence, Full text = <br /><br /><br />Electronically signed by: Bence Földessi Annikki Järventaus DO (1989-06-24 21:44:24 <br /><br />, Metadata = None 

type: FN, Annotation = PERSON, prediction = O, Token = Földessi, Full text = <br /><br /><br />Electronically signed by: Bence Földessi Annikki Järventaus DO (1989-06-24 21:44:24 <br /><br />, Metadata = None 

type: FN, Annotation = DATE_TIME, prediction = O, Token = 21:44:24, Full text = <br /><br /><br />Electronically signed by: Bence Földessi Annikki Järventaus DO (1989-06-24 21:44:24 <br /><br />, Metadata = None 

type: FP, Annotation = O, prediction = PERSON, Token = Petersons, Full text = The procedure was performed at Petersons, Metadata = None 

type: FP, Annotation = O, prediction = DATE_TIME, Token = earlier, Full text = PREVIOUS REPORT: HISTORY: Subcutaneous emphysema. Comparison chest radiograph earlier same day, 1007 hrs and 1972-07-19 01:01:18., Metada

[None, None, None, None, None, None, None, None, None, None]