In [1]:
!pip install "presidio-analyzer[transformers]"
!pip install presidio-anonymizer
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m38.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


# SPACY

In [3]:
from presidio_analyzer import AnalyzerEngine
from presidio_anonymizer import AnonymizerEngine

example = """
My name is Wolfgang and I live in 12 Oak Street, London, SW21 2UV. 
I'm having trouble connecting to linkedin.com. 
My email Wolfgang.hollang@gmail.com
"""

# Set up the engine, loads the NLP module (spaCy model by default) 
# and other PII recognizers
analyzer = AnalyzerEngine()

# Call analyzer to get results
results = analyzer.analyze(
    text=example,
    entities=["PHONE_NUMBER", "PERSON", "LOCATION", "EMAIL_ADDRESS"],
    language='en')
print(results)

# Analyzer results are passed to the AnonymizerEngine for anonymization

anonymizer = AnonymizerEngine()

anonymized_text = anonymizer.anonymize(text=example, analyzer_results=results)

print(anonymized_text)

[type: EMAIL_ADDRESS, start: 126, end: 152, score: 1.0, type: PERSON, start: 12, end: 20, score: 0.85, type: LOCATION, start: 50, end: 56, score: 0.85]
text: 
My name is <PERSON> and I live in 12 Oak Street, <LOCATION>, SW21 2UV. 
I'm having trouble connecting to linkedin.com. 
My email <EMAIL_ADDRESS>

items:
[
    {'start': 130, 'end': 145, 'entity_type': 'EMAIL_ADDRESS', 'text': '<EMAIL_ADDRESS>', 'operator': 'replace'},
    {'start': 50, 'end': 60, 'entity_type': 'LOCATION', 'text': '<LOCATION>', 'operator': 'replace'},
    {'start': 12, 'end': 20, 'entity_type': 'PERSON', 'text': '<PERSON>', 'operator': 'replace'}
]

