In [None]:
!pip install presidio-anonymizer
!pip install presidio_analyzer
!python -m spacy download en_core_web_md
!pip install transformers


In [None]:
from presidio_analyzer import AnalyzerEngine
from presidio_analyzer.nlp_engine import NlpEngineProvider
from presidio_anonymizer import AnonymizerEngine

In [None]:
configuration = {
    "nlp_engine_name":"spacy",
    "models":[{"lang_code":"en",
               "model_name":"en_core_web_md"}]
}

In [None]:
provider = NlpEngineProvider(nlp_configuration=configuration)
nlp_engine = provider.create_engine()
analyzer = AnalyzerEngine(nlp_engine=nlp_engine, supported_languages=["en"])

In [None]:
example_text = "Hi my name is Hermit. Born in Hyderabad, India in 2023. My phone number is 91-9309209121.(dont call me). I ate Pizza last year"
results = analyzer.analyze(text=example_text, language = 'en')

for res in results:
  print(res)

type: LOCATION, start: 30, end: 39, score: 0.85
type: LOCATION, start: 41, end: 46, score: 0.85
type: DATE_TIME, start: 50, end: 54, score: 0.85
type: DATE_TIME, start: 117, end: 126, score: 0.85
type: PHONE_NUMBER, start: 75, end: 88, score: 0.75
type: US_BANK_NUMBER, start: 78, end: 88, score: 0.05
type: US_DRIVER_LICENSE, start: 78, end: 88, score: 0.01


In [None]:
example_text = "Hi my name is Hermit. Born in Hyderabad, India in 2023. My phone number is 91-9309209121.(dont call me). I ate Pizza last year"
anonymizer = AnonymizerEngine()
results = analyzer.analyze(text=example_text, language = 'en')
anonymized_text = anonymizer.anonymize(text = example_text,analyzer_results = results)

In [None]:
print(anonymized_text)

text: Hi my name is Hermit. Born in <LOCATION>, <LOCATION> in <DATE_TIME>. My phone number is <PHONE_NUMBER>.(dont call me). I ate Pizza <DATE_TIME>
items:
[
    {'start': 131, 'end': 142, 'entity_type': 'DATE_TIME', 'text': '<DATE_TIME>', 'operator': 'replace'},
    {'start': 88, 'end': 102, 'entity_type': 'PHONE_NUMBER', 'text': '<PHONE_NUMBER>', 'operator': 'replace'},
    {'start': 56, 'end': 67, 'entity_type': 'DATE_TIME', 'text': '<DATE_TIME>', 'operator': 'replace'},
    {'start': 42, 'end': 52, 'entity_type': 'LOCATION', 'text': '<LOCATION>', 'operator': 'replace'},
    {'start': 30, 'end': 40, 'entity_type': 'LOCATION', 'text': '<LOCATION>', 'operator': 'replace'}
]



In [None]:
from transformers import pipeline



In [None]:
text = "test"
classifier = pipeline("text-classification", model="tensor-trek/distilbert-toxicity-classifier")
classifier(text)

[{'label': 'NEUTRAL', 'score': 0.9991722106933594}]