<a href="https://colab.research.google.com/github/shahzadahmad3/Natural-Language-Processing/blob/main/NER.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import re
text = "Contact us at support@openai.com or visit www.openai.com."
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
emails=re.findall(email_pattern,text)
print(emails)


['support@openai.com']


In [None]:
!pip install spacy
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m39.5 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
import spacy

nlp=spacy.load('en_core_web_sm')
# Sample text
text = "Elon Musk founded SpaceX in 2002, headquartered in California."
doc=nlp(text)
for ent in doc.ents:
  print(f"{ent.text}, {ent.label}")

Elon Musk, 380
2002, 391
California, 384


In [None]:
#We can visualize entities using spaCy’s displacy module.
from spacy import displacy
displacy.render(doc, style="ent", jupyter=True)

In [None]:
import nltk
from nltk import word_tokenize, pos_tag, ne_chunk
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('maxent_ne_chunker_tab')
nltk.download('words')

tokens=word_tokenize(text)
pos_tags=pos_tag(tokens)
tree=ne_chunk(pos_tags)
print(pos_tags)
print(tree)

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker_tab to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker_tab.zip.
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!


[('Elon', 'NNP'), ('Musk', 'NNP'), ('founded', 'VBD'), ('SpaceX', 'NNP'), ('in', 'IN'), ('2002', 'CD'), (',', ','), ('headquartered', 'VBN'), ('in', 'IN'), ('California', 'NNP'), ('.', '.')]
(S
  (PERSON Elon/NNP)
  (PERSON Musk/NNP)
  founded/VBD
  (ORGANIZATION SpaceX/NNP)
  in/IN
  2002/CD
  ,/,
  headquartered/VBN
  in/IN
  (GPE California/NNP)
  ./.)


In [None]:
#Custom NER Model Training with spaCy
#Step 1: Create Training Data
TRAIN_DATA=[
    ('Apple Inc. is a technology company.', {'entities': [(0, 10, 'ORG')]}),
    ('Microsoft acquired LinkedIn in 2016.', {'entities': [(0, 9, 'ORG'), (20,28, 'ORG'), (32,36, 'DATE')]}),
    # Add an example with 'O' tag for tokens outside entities
    ('This is a sentence with no entities.', {'entities': []}),
]

#Step 2: Train a Custom NER Model
import spacy
from spacy.training import Example

# Load blank model
nlp = spacy.blank("en")

# Create NER pipeline
ner = nlp.create_pipe("ner")

# Add the NER pipe to the pipeline using its name
nlp.add_pipe("ner", last=True) # Changed line: Pass "ner" instead of ner object


# Add entity labels
for _, annotations in TRAIN_DATA:
    for ent in annotations.get("entities"):
        ner.add_label(ent[2])

# Train the model
nlp.begin_training()
for _ in range(10):  # Training iterations
    for text, annotations in TRAIN_DATA:
        # Create Example object
        doc = nlp.make_doc(text) # Create a Doc object from text
        example = Example.from_dict(doc, annotations) # Create Example from Doc and annotations
        nlp.update([example]) # Update the model with the Example object


# Save the trained model
nlp.to_disk("custom_ner_model")