In [32]:
from presidio_analyzer import AnalyzerEngine, RecognizerRegistry, PatternRecognizer, PatternRecognizer, Pattern
from presidio_analyzer.nlp_engine import NlpEngineProvider
from presidio_anonymizer import AnonymizerEngine

text = 'Hello my friend John Robertson 12348'
textfr = "Bonjour, je m'apelle Marie Jean 1234"
configuration = {
    "nlp_engine_name": "spacy",
    "models": [
        {"lang_code": "fr", "model_name": "fr_core_news_md"},
        {"lang_code": "en", "model_name": "en_core_web_sm"},
    ],
}
provider = NlpEngineProvider(nlp_configuration=configuration)
fr_provider = provider.create_engine()



greet_pattern = PatternRecognizer(supported_entity="GREET",
                                      deny_list=["Hello","Bonjour"],
                                      supported_language=["en", "fr"])
numbers = Pattern(name="numbers", regex="[0-9]{5}", score=0.85)
number_pattern = PatternRecognizer(supported_entity="NUMBERS",
                                      patterns=[numbers],
                                      supported_language=["en", "fr"])

recognizer_regitry = RecognizerRegistry(supported_languages=['en','fr'])
recognizer_regitry.load_predefined_recognizers()

recognizer_regitry.add_recognizer(greet_pattern)
recognizer_regitry.add_recognizer(number_pattern)

analyzer = AnalyzerEngine(
    registry=recognizer_regitry,
    nlp_engine=fr_provider, 
    supported_languages=['en','fr']
    )


results = analyzer.analyze(text=text, language='en')
anonimyzer = AnonymizerEngine()
results = anonimyzer.anonymize(text=text,analyzer_results=results)
print(text)
print(results.text)


Hello my friend John Robertson 12348
Hello my friend <PERSON> <DATE_TIME>


In [5]:
from presidio_analyzer import AnalyzerEngine, RecognizerRegistry, PatternRecognizer, Pattern
from presidio_analyzer.nlp_engine import NlpEngineProvider
from presidio_anonymizer import AnonymizerEngine

text = 'Hello my friend John Robertson 12348'
# textfr = "Bonjour, je m'apelle Marie Jean 1234"

greet_pattern = PatternRecognizer(supported_entity="GREET",
                                      deny_list=["Hello","Bonjour"],
                                      supported_language='en')
numbers = Pattern(name="numbers", regex="[0-9]{5}", score=0.85)

number_pattern = PatternRecognizer(supported_entity="NUMBERS",
                                      patterns=[numbers],
                                      supported_language='en')

recognizer_regitry = RecognizerRegistry(supported_languages='en')

recognizer_regitry.add_recognizer(greet_pattern)
recognizer_regitry.add_recognizer(number_pattern)

analyzer = AnalyzerEngine(
    registry=recognizer_regitry,
    supported_languages='en'
    )


results = analyzer.analyze(text=text, language='en')
anonimyzer = AnonymizerEngine()
results = anonimyzer.anonymize(text=text,analyzer_results=results)
print(text)
print(results.text)


Hello my friend John Robertson 12348
<GREET> my friend John Robertson <NUMBERS>


In [16]:
from presidio_analyzer import AnalyzerEngine, RecognizerRegistry, PatternRecognizer, Pattern
from presidio_analyzer.nlp_engine import NlpEngineProvider
from presidio_anonymizer import AnonymizerEngine

# text = 'Hello my friend John Robertson 12348'
textfr = "Bonjour, je m'apelle 58 Marie Guy a Paris 12349"

configuration = {
    "nlp_engine_name": "spacy",
    "models": [
        {"lang_code": "fr", "model_name": "fr_core_news_md"},
        {"lang_code": "en", "model_name": "en_core_web_sm"},
    ],
}

provider = NlpEngineProvider(nlp_configuration=configuration)
fr_provider = provider.create_engine()

greet_pattern = PatternRecognizer(supported_entity="GREET",
                                      deny_list=["Hello","Bonjour"],
                                      supported_language='fr')

numbers_5 = Pattern(name="numbers", regex="[0-9]{5}", score=1)
numbers_2 = Pattern(name="numbers", regex="[0-9]{2}", score=1)

number_pattern = PatternRecognizer(supported_entity="NUMBERS",
                                      patterns=[numbers, numbers_2],
                                      supported_language='fr')

recognizer_regitry = RecognizerRegistry(supported_languages='fr')
recognizer_regitry.load_predefined_recognizers()
recognizer_regitry.add_recognizer(greet_pattern)
recognizer_regitry.add_recognizer(number_pattern)

analyzer = AnalyzerEngine(
    # registry=recognizer_regitry,
    nlp_engine=fr_provider, 
    supported_languages='fr'
    )


results = analyzer.analyze(text=textfr, language='fr')
anonimyzer = AnonymizerEngine()
results = anonimyzer.anonymize(text=textfr,analyzer_results=results)
print(textfr)
print(results.text)

Bonjour, je m'apelle 58 Marie Guy a Paris 12349
Bonjour, je m'apelle 58 <PERSON> a <LOCATION> 12349


In [12]:
from presidio_analyzer import AnalyzerEngine, RecognizerRegistry, PatternRecognizer, Pattern
from presidio_analyzer.nlp_engine import NlpEngineProvider
from presidio_anonymizer import AnonymizerEngine

# Sample text
text_fr = "Bonjour, je m'appelle 58 Marie Guy à Paris 12349"
text_eng = "Hello, my name is Steve Rogers, 58 I am Captain America from Washington 12349."

# NLP engine configuration
configuration = {
    "nlp_engine_name": "spacy",
    "models": [
        # {"lang_code": "fr", "model_name": "fr_core_news_md"},
        {"lang_code": "en", "model_name": "en_core_web_sm"},
    ],
}

provider = NlpEngineProvider(nlp_configuration=configuration)
fr_provider = provider.create_engine()

# Custom recognizers
greet_pattern_fr = PatternRecognizer(
    supported_entity="GREET",
    deny_list=["Bonjour"],
    supported_language="fr"
)

greet_pattern_en = PatternRecognizer(
    supported_entity="GREET",
    deny_list=["Hello"],
    supported_language="en"
)


numbers_5 = Pattern(name="numbers_5", regex="[0-9]{5}", score=1)
numbers_2 = Pattern(name="numbers_2", regex="[0-9]{2}", score=1)

number_pattern = PatternRecognizer(
    supported_entity="NUMBERS",
    patterns=[numbers_5, numbers_2],
    supported_language="en"
)

# Initialize the RecognizerRegistry and load predefined recognizers
recognizer_registry = RecognizerRegistry(supported_languages=["en"])
recognizer_registry.load_predefined_recognizers()

# Add custom recognizers
recognizer_registry.add_recognizer(greet_pattern_en)
recognizer_registry.add_recognizer(number_pattern)

# Create AnalyzerEngine with both Spacy NER and custom recognizers
analyzer = AnalyzerEngine(
    registry=recognizer_registry,
    nlp_engine=fr_provider,
    supported_languages=["en"]
)

# Analyze and anonymize
results = analyzer.analyze(text=text_eng, language="en")
anonymizer = AnonymizerEngine()
anonymized_text = anonymizer.anonymize(text=text_eng, analyzer_results=results)

# Output
print("Original:", text_eng)
print("Anonymized:", anonymized_text.text)
print(results)

Original: Hello, my name is Steve Rogers, 58 I am Captain America from Washington 12349.
Anonymized: <GREET>, my name is <PERSON>, <NUMBERS> I am Captain America from <LOCATION> <NUMBERS>.
[type: GREET, start: 0, end: 5, score: 1.0, type: NUMBERS, start: 32, end: 34, score: 1, type: NUMBERS, start: 72, end: 77, score: 1, type: PERSON, start: 18, end: 30, score: 0.85, type: DATE_TIME, start: 32, end: 34, score: 0.85, type: LOCATION, start: 61, end: 71, score: 0.85, type: DATE_TIME, start: 72, end: 77, score: 0.85, type: IN_PAN, start: 61, end: 71, score: 0.05]


In [5]:
analyzer.supported_languages

['fr', 'en']

In [None]:
a