In [0]:
pip install presidio-analyzer

In [0]:
pip install presidio-anonymizer

In [0]:
from typing import List
import pprint

from presidio_analyzer import AnalyzerEngine, PatternRecognizer, EntityRecognizer, Pattern, RecognizerResult
from presidio_analyzer.recognizer_registry import RecognizerRegistry
from presidio_analyzer.nlp_engine import NlpEngine, SpacyNlpEngine, NlpArtifacts

In [0]:
# Define the regex pattern in a Presidio `Pattern` object:
Email_pattern = Pattern(name="Email_pattern",regex="[a-zA-Z0-9+._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+", score = 0.5)

# Define the recognizer with one or more patterns
Email_recognizer = PatternRecognizer(supported_entity="Email", patterns = [Email_pattern])

In [0]:
myemail = "Feel free to mail me the issue at lakshay.sharma@rani.ai or to the our head aparnesh.gaurav@rani.ai"

Email_result = Email_recognizer.analyze(text=myemail, entities=["Email"])
print("Result:")
print(Email_result)
print(type(Email_result))

In [0]:
from presidio_anonymizer import AnonymizerEngine
from presidio_anonymizer.entities.engine import AnonymizerResult, OperatorConfig
# Initialize the engine with logger.
engine = AnonymizerEngine()
# Invoke the anonymize function with the text, analyzer results and
# Operators to define the anonymization type.
result = engine.anonymize(
  text=myemail,
    analyzer_results=Email_result,
    operators={"EMAIL": OperatorConfig("replace", {"new_value": "EMAIL_ID"})}
)

print(result.text)

In [0]:
pip install spacy

In [0]:
import spacy

In [0]:
%sh python -m spacy download en_core_web_sm

In [0]:
# Perform standard imports 
import spacy 
nlp = spacy.load('en_core_web_sm')

In [0]:
doc1 = nlp("hello I am taking disprin and my age is 25 and i am having bipolar disorder") 
show_ents(doc1)

In [0]:
ner=nlp.get_pipe("ner")

In [0]:
TRAIN_DATA = [
              ("hello i am taking disprin", {"entities": [(18, 25, "MEDICINE")]}),
              ("hello i am taking paracetamol", {"entities": [(18, 29, "MEDICINE")]}),
              ("and i am having fever", {"entities": [(16,21, "DISEASE")]}),
              ("and my age is 25 ", {"entities": [(14,16, "AGE")]}),
              ("and my age is 32 ", {"entities": [(14,16, "AGE")]}),
              ("hello i am taking Ibuprofen", {"entities": [(19,27, "MEDICINE")]}),
              ("and i am having Cramps", {"entities": [(16,22, "DISEASE")]}),
              ("and my age is 18 ", {"entities": [(14,16, "AGE")]}),
              ("hello i am taking Acetaminophen", {"entities": [(18,31, "MEDICINE")]}),
              ("and i am having cold", {"entities": [(16,20, "DISEASE")]}),
              ("and my age is 13 ", {"entities": [(14,16, "AGE")]}),
              ("hello i am taking naproxen ", {"entities": [(18,26, "MEDICINE")]}),
              ("and my age is 15 ", {"entities": [(14,16, "AGE")]}),
              ("and i am having headache", {"entities": [(16,24, "DISEASE")]}),
              ("and my age is 22 ", {"entities": [(14,16, "AGE")]}),
              ("hello i am taking aspirin ", {"entities": [(18,25, "MEDICINE")]}),
              ("and i am having bipolar disorder", {"entities": [(16,32, "DISEASE")]}),
              ("and my age is 26 ",{"entities": [(14,16, "AGE")]}),
              ("hello i am taking disprin", {"entities": [(18,25, "MEDICINE")]})
              ]

In [0]:
for _, annotations in TRAIN_DATA:
  for ent in annotations.get("entities"):
    print(ent[2])
    ner.add_label(ent[2])

In [0]:
# Disable pipeline components you dont need to change
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
unaffected_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]

In [0]:
# Import requirements
import random
from spacy.training import Example
from spacy.util import minibatch, compounding
from pathlib import Path

# TRAINING THE MODEL
with nlp.disable_pipes(*unaffected_pipes):

  # Training for 30 iterations
  for iteration in range(30):

    # shuufling examples  before every iteration
    random.shuffle(TRAIN_DATA)
    losses = {}
    # batch up the examples using spaCy's minibatch
    batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
    examples = []
    for batch in batches:
        for texts,annotations in batch:
          print(type(annotations))
          examples.append(Example.from_dict(nlp.make_doc(texts),annotations))
          nlp.update(examples,
                    drop=0.5,
                    losses=losses)
          print("Losses", losses)

In [0]:
# Testing the model
test = "hello I am taking disprin and my age is 25 and i am having bipolar disorder"
doc = nlp(test)
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
print(doc.text)