In [None]:
from presidio_analyzer import AnalyzerEngine, PatternRecognizer
from presidio_anonymizer import AnonymizerEngine
from presidio_anonymizer.entities import AnonymizerConfig

# Analyze Text for PII Entities

<br>Using Presidio Analyzer, analyze a text to identify PII entities. 
<br>The Presidio analyzer is using pre-defined entity recognizers, and offers the option to create custom recognizers.

<br>The following code sample will:
<ol>
<li>Set up the Analyzer engine - load the NLP module (spaCy model by default) and other PII recognizers</li>
<li> Call analyzer to get analyzed results for "PHONE_NUMBER" entity type</li>
</ol>

In [None]:
text_to_anonymize = "His name is Mr. Jones and his phone number is 212-555-5555"

In [None]:
analyzer = AnalyzerEngine()
analyzer_results = analyzer.analyze(text=text_to_anonymize, entities=["PHONE_NUMBER"], language='en')

print(analyzer_results)

# Create Custom PII Entity Recognizers

<br>Presidio Analyzer comes with a pre-defined set of entity recognizers. It also allows adding new recognizers without changing the analyzer base code,
<b>by creating custom recognizers. 
<br>In the following example, we will create two new recognizers of type `PatternRecognizer` to identify titles and pronouns in the analyzed text.
<br>A `PatternRecognizer` is a PII entity recognizer which uses regular expressions or deny-lists.

<br>The following code sample will:
<ol>
<li>Create custom recognizers</li>
<li>Add the new custom recognizers to the analyzer</li>
<li>Call analyzer to get results from the new recognizers</li>
</ol>


In [None]:
titles_recognizer = PatternRecognizer(supported_entity="TITLE",
                                      deny_list=["Mr.","Mrs.","Miss"])

pronoun_recognizer = PatternRecognizer(supported_entity="PRONOUN",
                                      deny_list=["he", "He", "his", "His", "she", "She", "hers" "Hers"])

analyzer.registry.add_recognizer(titles_recognizer)
analyzer.registry.add_recognizer(pronoun_recognizer)

analyzer_results = analyzer.analyze(text=text_to_anonymize,
                            entities=["TITLE", "PRONOUN"],
                            language="en")
print(analyzer_results)


Call Presidio Analyzer and get analyzed results with all the configured recognizers - default and new custom recognizers

In [None]:
analyzer_results = analyzer.analyze(text=text_to_anonymize, language='en')

analyzer_results

# Anonymize Text with Identified PII Entities

<br>Presidio Anonymizer iterates over the Presidio Analyzer result, and provides anonymization capabilities for the identified text.
<br>The anonymizer provides 5 types of anonymizers - replace, redact, mask, hash and encrypt. The default is **replace**

<br>The following code sample will:
<ol>
<li>Setup the anonymizer engine </li>
<li>Create an anonymizer request - text to anonymize, list of anonymizers to apply and the results from the analyzer request</li>
<li>Anonymize the text</li>
</ol>

In [None]:
anonymizer = AnonymizerEngine()

anonymized_results = anonymizer.anonymize(
    text=text_to_anonymize,
    analyzer_results=analyzer_results,    
    anonymizers_config={"DEFAULT": AnonymizerConfig("replace", {"new_value": "<ANONYMIZED>"}), 
                        "PHONE_NUMBER": AnonymizerConfig("mask", {"type": "mask", "masking_char" : "*", "chars_to_mask" : 12, "from_end" : True}),
                        "TITLE": AnonymizerConfig("redact", {})}
)

print(anonymized_results)