# PROGRAM FOR NER - USING CLASSICAL ML MODEL

In [1]:
import numpy as np
from sklearn.feature_extraction import DictVectorizer
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline

In [2]:
# Sample training data
training_data = [
    ({'word': 'Jane'}, 'PERSON'),
    ({'word': 'Villanueva'}, 'PERSON'),
    ({'word': 'United'}, 'ORGANIZATION'),
    ({'word': 'United Airlines Holding'}, 'ORGANIZATION'),
    ({'word': 'Chicago'}, 'LOCATION')
]

In [3]:
# Extract features from the training data
X = [data[0] for data in training_data]
y = [data[1] for data in training_data]

In [5]:
print(X , y)

[{'word': 'Jane'}, {'word': 'Villanueva'}, {'word': 'United'}, {'word': 'United Airlines Holding'}, {'word': 'Chicago'}] ['PERSON', 'PERSON', 'ORGANIZATION', 'ORGANIZATION', 'LOCATION']


In [7]:
# Create a pipeline with feature extraction and classifier
pipeline = Pipeline([
    ('feature_extraction', DictVectorizer()),
    ('classifier', LinearSVC())
])

In [8]:
# Train the model
model = pipeline.fit(X, y)

In [20]:
# Sample test data
test_data_1 = {'word': 'Jane'}
test_data_2 = {'word': 'Chicago'}

# Predict the named entity label
prediction1 = model.predict([test_data_1])
prediction2 = model.predict([test_data_2])

print(f'Named Entity Label for {test_data_1}:', prediction1[0])
print(f'Named Entity Label for {test_data_2}:', prediction2[0])

Named Entity Label for {'word': 'Jane'}: PERSON
Named Entity Label for {'word': 'Chicago'}: LOCATION


# PROGRAM FOR NER - USING MODERN ML MODEL

In [2]:
# Import free, open-source library for NLP in Python
#designed to make it easy to build systems for information extraction or general-purpose NLP
import spacy
from spacy import displacy

In [3]:
# Load model that is pre-tranied on eglish language, sm = small model that is optimized for speed
nlp = spacy.load("en_core_web_sm")

In [21]:
#Load the text
text =("We are student of NED University, Pakistan. Student of Batch 2021")
#Process the text
doc = nlp(text)

In [22]:
# Iterate over the entities in the document and print entities
ents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents]
print(ents)

[('NED University', 18, 32, 'ORG'), ('Pakistan', 34, 42, 'GPE'), ('2021', 61, 65, 'DATE')]


In [23]:
# using displaycy function for visualization purpose
displacy.render(doc, style='ent', jupyter=True)