# Resume Parser using Spacy

In [36]:
# !pip install pdfminer.six

In [37]:
import spacy
import random
from spacy.training.example import Example
from spacy.util import minibatch, compounding
from pathlib import Path

In [38]:
from pdfminer.high_level import extract_text

def extract_text_from_pdf(pdf_path):
    return extract_text(pdf_path)

In [39]:
def parse_resume(text):
    doc = nlp(text)

    # Extract information based on spaCy's NER (Named Entity Recognition) capabilities
    entities = [(ent.text, ent.label_) for ent in doc.ents]

    return entities

In [40]:
nlp = spacy.load("en_core_web_sm")
ner = nlp.get_pipe('ner')
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [41]:
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
unaffected_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
unaffected_pipes

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer']

In [42]:
# Add the new label to ner
Label1 = "TENURE"
Label2 = "SKILLS"
Label3 = "COURSE"
Label4 = "INVOLVEMENT"
Label5 = "EXPERIENCE"
Label6 = "SOCIALS"
Label7 = "EDUCATION"
# Label8 = "ORG" # for education and work institutions

# label=[]
# for i in range(1, 6):
#     label.append(globals()[f"Label{i}"])

for i in range(1,8):
    ner.add_label(globals()[f"Label{i}"])

# # Resume training
optimizer = nlp.resume_training()
move_names = list(ner.move_names)

In [43]:
ner.labels

('CARDINAL',
 'COURSE',
 'DATE',
 'EDUCATION',
 'EVENT',
 'EXPERIENCE',
 'FAC',
 'GPE',
 'INVOLVEMENT',
 'LANGUAGE',
 'LAW',
 'LOC',
 'MONEY',
 'NORP',
 'ORDINAL',
 'ORG',
 'PERCENT',
 'PERSON',
 'PRODUCT',
 'QUANTITY',
 'SKILLS',
 'SOCIALS',
 'TENURE',
 'TIME',
 'WORK_OF_ART')

In [44]:
resume = """Rupesh Ghimire
rupeshghimire007@gmail.com | +977-9868155925

EDUCATION
Pashchimanchal Campus, IOE, TU
Bachelor in Computer Engineering
2019-Present
Fusemachines
Micro-Degree in Artificial Intelligence
2023-Present
Fellowship Scholar
LINKS
Github:// rupeshghimire7
LinkedIn:// rupesh-ghimire7
Medium:// rupeshghimire7
LeetCode:// rupeshghimire7
INVOLVEMENTS
NAAMI | ANAIS-Student Ambassador
Apr 2023 - Jun 2023
i-CES | Django Mentor
Jan 2023 - Feb 2023 | Pokhara
Made attendees familiar with Python and Django's MVC
architecture, ORM, Templates and Rest Framework.
Coding Competition (GCES) - 2023
Code with Coffee (i-CES) - 2022
PROJECTS
LIVER CIRRHOSIS PREDICTION
Fusemachines ML Final Project | (Feb 2023- Apr 2023)
Worked on the multiclass classification problem with
various classifiers to predict the stage of patients' liver.
Deployed on Flask.
BCT Study Room
Software Engineering Project | (May 2022- Jul 2022)
A django web-app to enable discussion in groups.
BookSuggestor
COURSEWORKSDBMS Project
Leveraged RAW SQL for database management using
MySQL in Django bypassing Django's ORM.
UNDERGRADUATETelegram Chat Bot
Data Structures & Algorithms
Operating Systems
Database Management System
Software Engineering
Artificial Intelligence
Computer NetworksPersonal Project
Used Telegram API for configuration and python for
implementation.
Machine Learning AlgorithmsACHIEVEMENTS
FUSEMACHINES
PROGRAMMING SKILLS
EXPERIENCED
Python | Django
Tailwind CSS | HTML5 | CSS3
INTERMEDIATE
Pytorch
MySQL
Pandas | Numpy
Matplotlib | Seaborn | Scikit-Learn
RESTful API
C/C++
FAMILIAR
Tensorflow/ Keras
JavaScript | ReactJS
ML Projects
Personal Project
Collection of Regression and Classification projects.
HultPrize 2021
To build viable food enterprises to create jobs, stimulate
economies, reimagine supply chains, and improve outcomes for
10,000,000 people by 2030.
OnCampus: 1st Runner Up
Regional Summit: Participant via Wildcard
Golden Jubilee Scholarship Scheme
Awarded from Embassy of India, Kathmandu for
Undergrad Studies.
LANGUAGES
Nepali - Native Proficiency
English - Professional Working Proficiency"""

In [45]:
resume = resume.replace('\n', ' ').replace('|', '')

### TRAINER FUNCTION

Takes list train data of form:
        
        [("Pizza is a common fast food.", {"entities": [(0, 5, "FOOD")]}),
        ("Pasta is an italian recipe", {"entities": [(0, 5, "FOOD")]})]
        
Resume Doc is spacy doc object of resume:

In [78]:
def trainer(DATA,epoch=10):
    
    # TRAINING THE MODEL
    with nlp.disable_pipes(*unaffected_pipes):

        # Training for n iterations
        for iteration in range(epoch):

            # Shuffling examples before every iteration
            random.shuffle(DATA)
            losses = {}

            # Batch up the examples using spaCy's minibatch
            batches = minibatch(DATA, size=compounding(4.0, 32.0, 1.001))

            for batch in batches:
                examples = []
                for text, annotation in batch:
                    example = Example.from_dict(nlp.make_doc(text), annotation)
                    examples.append(example)

                nlp.update(
                    examples,  
                    sgd=optimizer,
                    drop=0.5,  
                    losses=losses,
                )

            print("Losses", losses)

### Data Generator function

Label -> "SKILLS"

List of data -> ['React', 'Django', 'Machine Learning', 'AI']  

In [47]:
def data_generator(label,data_list,level=None):
    training_data = []

    for i in range(len(data_list)):
        data = data_list[i]
        start_index = 0
        end_index = start_index + len(data)
        training_data.append((data, {"entities": [(start_index, end_index, label)]}))

    return training_data

### View Results

In [48]:
def view_results(doc):
    for ent in doc.ents:
        print(f"{ent.label_}: {ent.text}")

### Creating data for tecnhical skills i.e. Entity Label : SKILLS

In [49]:
technical_skills = [
    # Programming Languages
    "Python", "Java", "C++", "JavaScript", "HTML", "CSS", "Ruby", "Go", "Swift", "Kotlin",
    "TypeScript", "Rust", "Scala", "PHP", "C#", "Objective-C",

    # Web Development
    "Django", "Flask", "Node.js", "Express.js", "React", "Angular", "Vue.js", "Next.js",
    "Spring Boot", "Ruby on Rails", "ASP.NET", "Meteor", "HTML5", "CSS3", "Bootstrap",

    # Databases
    "SQL", "MySQL", "PostgreSQL", "MongoDB", "Redis", "SQLite", "Firebase", "Cassandra",

    # Machine Learning / Data Science
    "TensorFlow", "PyTorch", "Scikit-learn", "Keras", "Pandas", "NumPy", "Matplotlib",
    "Seaborn", "NLTK", "Spacy", "Scrapy", "Beautiful Soup",

    # Cloud Computing
    "AWS", "Azure", "Google Cloud Platform (GCP)", "Docker", "Kubernetes", "Heroku",

    # DevOps
    "Jenkins", "Travis CI", "GitLab CI", "Ansible", "Terraform", "Docker Compose",

    # Version Control
    "Git", "GitHub", "Bitbucket", "GitLab",

    # Mobile Development
    "React Native", "Flutter", "Xamarin", "SwiftUI", "Android SDK",

    # Frameworks
    "React", "Angular", "Vue.js", "Django", "Flask", "Ruby on Rails", "Spring Boot", "Express.js",

    # Libraries
    "Pandas", "NumPy", "Matplotlib", "Seaborn", "NLTK", "Spacy", "Scrapy", "Beautiful Soup",

    # Frontend Technologies
    "React", "Angular", "Vue.js", "Next.js", "TypeScript", "Webpack", "Babel", "SASS",

    # Backend Technologies
    "Node.js", "Django", "Flask", "Spring Boot", "Ruby on Rails", "Express.js", "PHP",

    # Mobile Frameworks
    "React Native", "Flutter", "Xamarin", "SwiftUI", "Android SDK",

    # Networking
    "TCP/IP", "HTTP/HTTPS", "DNS", "Load Balancing", "Firewalls", "Proxy Servers",

    # Security
    "Cybersecurity", "Penetration Testing", "Cryptography", "OWASP", "SSL/TLS",

    # Operating Systems
    "Linux", "Windows", "macOS", "Unix",

    # Other Technologies
    "Blockchain", "Serverless", "Microservices", "RESTful API", "GraphQL",

    # Project Management / Agile
    "Scrum", "Kanban", "Agile", "JIRA", "Trello",

    # Miscellaneous
    "Data Manipulation", "Natural Language Processing (NLP)", "Computer Vision",
]
level = ['beginner', 'intermediate', 'expert']

In [50]:
skills_DATA = data_generator("SKILLS",technical_skills, level)

for data in TRAIN_DATA[:5]:
    print(data)
for data in TRAIN_DATA[-5:]:
    print(data)
print("TRAIN_DATA length", len(TRAIN_DATA))

('Computer Vision', {'entities': [(0, 15, 'SKILLS')]})
('Android SDK', {'entities': [(0, 11, 'SKILLS')]})
('Android SDK', {'entities': [(0, 11, 'SKILLS')]})
('Unix', {'entities': [(0, 4, 'SKILLS')]})
('NumPy', {'entities': [(0, 5, 'SKILLS')]})
('macOS', {'entities': [(0, 5, 'SKILLS')]})
('CSS', {'entities': [(0, 3, 'SKILLS')]})
('Docker', {'entities': [(0, 6, 'SKILLS')]})
('Azure', {'entities': [(0, 5, 'SKILLS')]})
('Kubernetes', {'entities': [(0, 10, 'SKILLS')]})
TRAIN_DATA length 136


In [51]:
view_results(nlp(resume))

PERSON: Rupesh Ghimire rupeshghimire007@gmail.com
PERSON: Pashchimanchal Campus
ORG: IOE
PERSON: TU Bachelor
DATE: 2019
PERSON: rupeshghimire7
ORG: rupeshghimire7 INVOLVEMENTS NAAMI  ANAIS-Student
DATE: 2023 - Jun 2023
DATE: Jan 2023 - Feb 2023
PERSON: Django
ORG: MVC
PERSON: Templates
FAC: Rest Framework
GPE: Flask
DATE: May 2022- Jul 2022
ORG: BookSuggestor COURSEWORKSDBMS Project Leveraged RAW SQL
GPE: Django
PERSON: Django
ORG: UNDERGRADUATETelegram Chat Bot Data Structures & Algorithms Operating Systems Database Management System Software Engineering Artificial Intelligence Computer NetworksPersonal Project
PERSON: Machine Learning
PERSON: Numpy Matplotlib
PERSON: Seaborn  Scikit-Learn
PERSON: Keras JavaScript  ReactJS
ORG: ML Projects Personal Project Collection of Regression and Classification
CARDINAL: 10,000,000
DATE: 2030
PERSON: OnCampus
CARDINAL: 1st
PERSON: Participant
PERSON: Wildcard Golden Jubilee Scholarship Scheme Awarded
GPE: Embassy
GPE: India
GPE: Kathmandu
PERSON:

In [52]:
trainer(DATA= TRAIN_DATA)

Losses {'ner': 210.94944460677402}
Losses {'ner': 98.216194018919}
Losses {'ner': 80.61402582889423}
Losses {'ner': 63.73190387128852}
Losses {'ner': 50.56522207009766}
Losses {'ner': 27.954779526491997}
Losses {'ner': 21.760550867969865}
Losses {'ner': 12.21843597857575}
Losses {'ner': 6.516566119138227}
Losses {'ner': 11.500183392579999}


In [53]:
view_results(nlp(resume))

SKILLS: Rupesh Ghimire
SKILLS: +977-9868155925  
SKILLS: EDUCATION Pashchimanchal Campus
SKILLS: , TU Bachelor
SKILLS: in Computer Engineering
SKILLS: 2019-Present Fusemachines Micro-Degree
SKILLS: Artificial Intelligence
SKILLS: 2023-Present Fellowship Scholar
SKILLS: Github:// rupeshghimire7
SKILLS: rupesh-ghimire7
SKILLS: Medium:// rupeshghimire7
SKILLS: LeetCode:// rupeshghimire7 INVOLVEMENTS NAAMI  ANAIS-Student Ambassador Apr
SKILLS: 2023 - Jun
SKILLS: i-CES  Django Mentor
SKILLS: Jan
SKILLS: 2023 - Feb
SKILLS: Pokhara Made
SKILLS: Python and Django's MVC
SKILLS: ORM, Templates
SKILLS: Rest Framework
SKILLS: Coding Competition
SKILLS: 2023 Code
SKILLS: with Coffee
SKILLS: i-CES
SKILLS: 2022 PROJECTS LIVER CIRRHOSIS PREDICTION Fusemachines ML Final Project
SKILLS: 2023) Worked
SKILLS: multiclass classification
SKILLS: predict the stage of patients' liver.
SKILLS: Deployed on Flask
SKILLS: BCT Study Room Software Engineering Project
SKILLS: 2022- Jul
SKILLS: web-app
SKILLS: BookSug

**It seems like it is showing everything as skill. We will still train our model further on more data and labels and see how it performs. If the final result obtained isn't good, we will train our data directly on resume data instead of separate data**

### Train Tenure (for eg: Jan 2023 - Jan 2024)

In [54]:
date_phrases = [
    "January 2020 - Present",
    "2018 - 2022",
    "Dec 2015 - Mar 2018",
    "July 2021 - August 2023",
    "September 2019 - Nov 2020",
    "2020 - Present",
     "Oct 2018 - Apr 2020",
    "May 2016 - September 2019",
    "June 2019 - Dec 2021",
    "2005 - 2010",
    "Mar 2017 - Present",
    "July 2022",
    "Jan 2014 - Jun 2015",
    "August 2020 - Current",
    "Fall 2018 - Spring 2022",
    "Q3 2019 - Q1 2021",
    "Dec '19 - Feb '22",
    "3/2015 - 5/2018",
    "Apr 2008",
    "Summer 2016"
]

In [55]:
tenure_DATA = data_generator("TENURE",date_phrases, level)

for data in TENURE_DATA[:5]:
    print(data)
for data in TENURE_DATA[-5:]:
    print(data)
print("TENURE_DATA length", len(TENURE_DATA))

('January 2020 - Present', {'entities': [(0, 22, 'TENURE')]})
('July 2021 - August 2023', {'entities': [(0, 23, 'TENURE')]})
('June 2019 - Dec 2021', {'entities': [(0, 20, 'TENURE')]})
('2005 - 2010', {'entities': [(0, 11, 'TENURE')]})
("Dec '19 - Feb '22", {'entities': [(0, 17, 'TENURE')]})
('Q3 2019 - Q1 2021', {'entities': [(0, 17, 'TENURE')]})
('July 2022', {'entities': [(0, 9, 'TENURE')]})
('Fall 2018 - Spring 2022', {'entities': [(0, 23, 'TENURE')]})
('2018 - 2022', {'entities': [(0, 11, 'TENURE')]})
('September 2019 - Nov 2020', {'entities': [(0, 25, 'TENURE')]})
TENURE_DATA length 20


In [56]:
trainer(TENURE_DATA)

Losses {'ner': 77.65634635709011}
Losses {'ner': 56.204577534453584}
Losses {'ner': 53.5130533274563}
Losses {'ner': 70.05299830133299}
Losses {'ner': 65.22964933950408}
Losses {'ner': 49.729878971062135}
Losses {'ner': 19.28979122333476}
Losses {'ner': 16.963734334276523}
Losses {'ner': 6.6799235234229855}
Losses {'ner': 8.796177978858395}


In [57]:
view_results(nlp(resume))

TENURE: Rupesh Ghimire rupeshghimire007@gmail.com  
TENURE: TU Bachelor
TENURE: rupesh-ghimire7 Medium:// rupeshghimire7
TENURE: rupeshghimire7 INVOLVEMENTS NAAMI  ANAIS-Student Ambassador Apr 2023 - Jun 2023
TENURE: i-CES  Django Mentor Jan 2023 - Feb 2023  
TENURE: Python and Django's MVC
TENURE: ORM, Templates and Rest Framework
TENURE: Coding Competition (GCES) - 2023 Code with Coffee (i-CES) - 2022 PROJECTS LIVER CIRRHOSIS PREDICTION Fusemachines ML Final Project  (Feb 2023- Apr 2023) Worked on the multiclass classification problem
TENURE: predict the stage of patients' liver.
TENURE: Deployed on Flask
TENURE: BCT Study Room Software Engineering Project  (May 2022- Jul 2022
TENURE: BookSuggestor COURSEWORKSDBMS Project Leveraged RAW SQL
TENURE: UNDERGRADUATETelegram Chat Bot Data Structures & Algorithms Operating Systems Database Management System Software Engineering Artificial Intelligence Computer NetworksPersonal Project Used Telegram API for configuration and python for imple

In [58]:
IT_courses = [
    # Machine Learning
    "Machine Learning A-Z™: Hands-On Python & R In Data Science",
    "TensorFlow for Deep Learning",
    "Natural Language Processing in Python",
    "Reinforcement Learning Specialization",
    "Introduction to Deep Learning with PyTorch",
    
    # Python
    "Advanced Python Programming",
    "Flask Web Development",
    "Python GUI Programming with Tkinter",
    "Django for Beginners",
    "Python for Finance and Algorithmic Trading",
    
    # JavaScript
    "JavaScript - The Complete Guide 2022 (Beginner + Advanced)",
    "React - The Complete Guide (incl Hooks, React Router, Redux)",
    "Node.js - The Complete Guide",
    "Vue.js - The Complete Guide",
    "D3.js - Data Visualization with JavaScript",
    
    # Web Development
    "Advanced Web Developer Bootcamp",
    "GraphQL with React: The Complete Developers Guide",
    "MERN Stack - Build and Deploy Full-Stack React-Redux & Node.js",
    "Responsive Web Design Certification",
    "Progressive Web Apps (PWA) - The Complete Guide",
    
    # Graphics and Web Design
    "Adobe Illustrator CC - Essentials Training",
    "UI/UX Design - Adobe XD to WordPress Development",
    "Web Design for Beginners: Real World Coding in HTML & CSS",
    "Adobe Photoshop CC - Essentials Training Course",
    "Figma: Interactive Prototyping and Design (UI/UX)",
    
    # Web App Development
    "Full Stack Open 2021 - Deep Dive Into Modern Web Development",
    "Building Scalable APIs with GraphQL",
    "Serverless Functions and APIs using AWS Lambda",
    "Firebase - The Complete Guide",
    "Docker and Kubernetes: The Complete Guide",
    
    # AI
    "Artificial Intelligence: Business Strategies and Applications",
    "AI for Everyone",
    "Introduction to Deep Learning with Keras",
    "Building AI Powered Chatbots Without Programming",
    "AI and Machine Learning for Business",
    
    # Database Management
    "MongoDB - The Complete Developer's Guide",
    "SQL - MySQL for Data Analytics and Business Intelligence",
    "Database Design and MySQL",
    "Firebase Firestore for Web Development",
    "Neo4j Graph Databases with Cypher",
    
    # Data Analytics
    "Data Science and Machine Learning Bootcamp with R",
    "Google Analytics Certification: Become Certified & Earn More",
    "Power BI - A Complete Introduction",
    "Big Data Analytics with Spark",
    "Tableau 2020 A-Z: Hands-On Tableau Training For Data Science!",
    
    # Security and Ethical Hacking
    "Certified Information Systems Security Professional (CISSP)",
    "Ethical Hacking for Beginners",
    "CompTIA Security+ (SY0-601): Complete Course and Practice Exam",
    "Network Penetration Testing and Ethical Hacking",
    "Web Application Penetration Testing and Bug Bounty Course",
    
    # Cloud Computing
    "AWS Certified Cloud Practitioner - Full Course",
    "Microsoft Azure - Beginner's Guide + AZ-900",
    "Google Cloud Platform (GCP) - Cloud Architect",
    "Serverless Architecture on AWS",
    "DevOps on AWS: Learn CodeDeploy, AWS Elastic Beanstalk & More",
    
    # Additional Topics
    "Blockchain Basics: A Non-Technical Introduction",
    "Internet of Things (IoT) - The Complete Guide",
    "Cybersecurity Specialization",
    "Data Engineering, Big Data, and Machine Learning on GCP",
    "Complete Linux Training Course to Get Your Dream IT Job 2022",
    "5G NR (New Radio) - The Complete Course",
    "Introduction to Quantum Computing",
    "Python for Data Science",
    "Java Programming and Software Engineering",
    "Full Stack Web Development",
    "SQL Fundamentals",
    "Certified Ethical Hacker (CEH) Training",
    "AWS Certified Solutions Architect - Associate",
    "Machine Learning with Python",
    "Agile and Scrum Foundations",
    "Project Management Professional (PMP) Certification",
    "Cisco Certified Network Associate (CCNA)",
    "CompTIA A+, Security+, and Network+ Certifications",
    "Microsoft Certified: Azure, Windows, and Office Certifications",
    "Cybersecurity Essentials",
    "Data Science and Big Data Analytics",
    "DevOps Practices and Principles",
    "Network Security Fundamentals",
    "Deep Learning Specialization",
    "Google Cloud Platform (GCP) - Cloud Engineer",
    "HTML and CSS Fundamentals",
    "NoSQL Database Concepts",
    "Kanban for Software Development",
    "Certified Information Systems Security Professional (CISSP) Certification",
    "Project Management Professional (PMP) Certification",
    "Scrum Master Training",
    "Natural Language Processing (NLP)",
    "Wireless Networking Fundamentals",
    "Software-Defined Networking (SDN)",
    "Network Security and Firewall Management",
    "ITIL Foundation",
    "PRINCE2 Practitioner",
    "Certified Associate in Project Management (CAPM)",
    "Python for Data Science",
    "Java Programming and Software Engineering",
    "Full Stack Web Development",
    "SQL Fundamentals",
    "Certified Ethical Hacker (CEH) Training",
    "AWS Certified Solutions Architect - Associate",
    "Machine Learning with Python",
    "Agile and Scrum Foundations",
    "Project Management Professional (PMP) Certification",
    "Cisco Certified Network Associate (CCNA)",
    "CompTIA A+, Security+, and Network+ Certifications",
    "Microsoft Certified: Azure, Windows, and Office Certifications",
    "Cybersecurity Essentials",
    "Data Science and Big Data Analytics",
    "DevOps Practices and Principles",
    "Network Security Fundamentals",
    "Deep Learning Specialization",
    "Google Cloud Platform (GCP) - Cloud Engineer",
    "HTML and CSS Fundamentals",
    "NoSQL Database Concepts",
    "Kanban for Software Development",
    "Certified Information Systems Security Professional (CISSP) Certification",
    "Project Management Professional (PMP) Certification",
    "Scrum Master Training",
    "Natural Language Processing (NLP)",
    "Wireless Networking Fundamentals",
    "Software-Defined Networking (SDN)",
    "Network Security and Firewall Management",
    "ITIL Foundation",
    "PRINCE2 Practitioner",
    "Certified Associate in Project Management (CAPM)",
]

In [59]:
course_DATA = data_generator('COURSE',it_courses)

In [60]:
other_label_DATA = [
    ('123', {'entities': [(0, 3, 'CARDINAL')]}),
    ('January 1, 2022', {'entities': [(0, 15, 'DATE')]}),
    ('World Cup', {'entities': [(0, 9, 'EVENT')]}),
    ('Eiffel Tower', {'entities': [(0, 12, 'FAC')]}),
    ('New York', {'entities': [(0, 8, 'GPE')]}),
    ('English', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Constitution', {'entities': [(0, 11, 'LAW')]}),
    ('Statue of Liberty', {'entities': [(0, 17, 'LOC')]}),
    ('$100', {'entities': [(0, 4, 'MONEY')]}),
    ('American', {'entities': [(0, 8, 'NORP')]}),
    ('1st', {'entities': [(0, 3, 'ORDINAL')]}),
    ('Microsoft', {'entities': [(0, 9, 'ORG')]}),
    ('25%', {'entities': [(0, 3, 'PERCENT')]}),
    ('John Doe', {'entities': [(0, 8, 'PERSON')]}),
    ('iPhone', {'entities': [(0, 6, 'PRODUCT')]}),
    ('5 kilograms', {'entities': [(0, 12, 'QUANTITY')]}),
    ('3:30 PM', {'entities': [(0, 7, 'TIME')]}),
    ('Mona Lisa', {'entities': [(0, 9, 'WORK_OF_ART')]}),
    ('456', {'entities': [(0, 3, 'CARDINAL')]}),
    ('June 15, 2022', {'entities': [(0, 14, 'DATE')]}),
    ('Oscars', {'entities': [(0, 6, 'EVENT')]}),
    ('Pyramids of Giza', {'entities': [(0, 16, 'FAC')]}),
    ('London', {'entities': [(0, 6, 'GPE')]}),
    ('French', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Bill of Rights', {'entities': [(0, 15, 'LAW')]}),
    ('Grand Canyon', {'entities': [(0, 12, 'LOC')]}),
    ('$5000', {'entities': [(0, 5, 'MONEY')]}),
    ('Chinese', {'entities': [(0, 7, 'NORP')]}),
    ('2nd', {'entities': [(0, 3, 'ORDINAL')]}),
    ('Google', {'entities': [(0, 6, 'ORG')]}),
    ('50%', {'entities': [(0, 3, 'PERCENT')]}),
    ('Jane Smith', {'entities': [(0, 10, 'PERSON')]}),
    ('PlayStation', {'entities': [(0, 11, 'PRODUCT')]}),
    ('10 liters', {'entities': [(0, 9, 'QUANTITY')]}),
    ('12:45 AM', {'entities': [(0, 7, 'TIME')]}),
    ('The Starry Night', {'entities': [(0, 16, 'WORK_OF_ART')]}),
    ('789', {'entities': [(0, 3, 'CARDINAL')]}),
    ('August 10, 2023', {'entities': [(0, 15, 'DATE')]}),
    ('World Series', {'entities': [(0, 12, 'EVENT')]}),
    ('The Louvre Museum', {'entities': [(0, 17, 'FAC')]}),
    ('Tokyo', {'entities': [(0, 5, 'GPE')]}),
    ('Spanish', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Civil Rights Act', {'entities': [(0, 16, 'LAW')]}),
    ('Mount Everest', {'entities': [(0, 13, 'LOC')]}),
    ('$75.50', {'entities': [(0, 6, 'MONEY')]}),
    ('Indian', {'entities': [(0, 6, 'NORP')]}),
    ('3rd', {'entities': [(0, 3, 'ORDINAL')]}),
    ('Amazon', {'entities': [(0, 6, 'ORG')]}),
    ('75.5%', {'entities': [(0, 5, 'PERCENT')]}),
    ('Mark Johnson', {'entities': [(0, 12, 'PERSON')]}),
    ('MacBook Pro', {'entities': [(0, 11, 'PRODUCT')]}),
    ('5 miles', {'entities': [(0, 7, 'QUANTITY')]}),
    ('4:00 PM', {'entities': [(0, 7, 'TIME')]}),
    ('The Last Supper', {'entities': [(0, 15, 'WORK_OF_ART')]}),
    ('1000', {'entities': [(0, 3, 'CARDINAL')]}),
    ('December 5, 2024', {'entities': [(0, 15, 'DATE')]}),
    ('Super Bowl', {'entities': [(0, 10, 'EVENT')]}),
    ('The Colosseum', {'entities': [(0, 13, 'FAC')]}),
    ('Sydney', {'entities': [(0, 6, 'GPE')]}),
    ('German', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Universal Declaration of Human Rights', {'entities': [(0, 36, 'LAW')]}),
    ('Great Wall of China', {'entities': [(0, 18, 'LOC')]}),
    ('$50000', {'entities': [(0, 6, 'MONEY')]}),
    ('Australian', {'entities': [(0, 9, 'NORP')]}),
    ('4th', {'entities': [(0, 3, 'ORDINAL')]}),
    ('NASA', {'entities': [(0, 4, 'ORG')]}),
    ('15%', {'entities': [(0, 3, 'PERCENT')]}),
    ('Emily Watson', {'entities': [(0, 12, 'PERSON')]}),
    ('PlayStation 5', {'entities': [(0, 13, 'PRODUCT')]}),
    ('20 kilograms', {'entities': [(0, 12, 'QUANTITY')]}),
    ('1:30 AM', {'entities': [(0, 7, 'TIME')]}),
    ('Starry Night by Vincent van Gogh', {'entities': [(0, 30, 'WORK_OF_ART')]}),
    ('2000', {'entities': [(0, 3, 'CARDINAL')]}),
    ('March 20, 2025', {'entities': [(0, 14, 'DATE')]}),
    ('Academy Awards', {'entities': [(0, 15, 'EVENT')]}),
    ('Machu Picchu', {'entities': [(0, 12, 'FAC')]}),
    ('Berlin', {'entities': [(0, 6, 'GPE')]}),
    ('Italian', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Declaration of Independence', {'entities': [(0, 27, 'LAW')]}),
    ('Niagara Falls', {'entities': [(0, 14, 'LOC')]}),
    ('$100000', {'entities': [(0, 7, 'MONEY')]}),
    ('Japanese', {'entities': [(0, 8, 'NORP')]}),
    ('5th', {'entities': [(0, 3, 'ORDINAL')]}),
    ('SpaceX', {'entities': [(0, 5, 'ORG')]}),
    ('30%', {'entities': [(0, 3, 'PERCENT')]}),
    ('Michael Johnson', {'entities': [(0, 15, 'PERSON')]}),
    ('iPhone 12', {'entities': [(0, 8, 'PRODUCT')]}),
    ('15 miles', {'entities': [(0, 8, 'QUANTITY')]}),
    ('6:45 PM', {'entities': [(0, 7, 'TIME')]}),
    ('Mona Lisa by Leonardo da Vinci', {'entities': [(0, 28, 'WORK_OF_ART')]}),
    ('5000', {'entities': [(0, 3, 'CARDINAL')]}),
    ('October 8, 2023', {'entities': [(0, 15, 'DATE')]}),
    ('World Cup', {'entities': [(0, 9, 'EVENT')]}),
    ('The Great Wall of China', {'entities': [(0, 22, 'FAC')]}),
    ('Paris', {'entities': [(0, 5, 'GPE')]}),
    ('Russian', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Human Rights Act', {'entities': [(0, 16, 'LAW')]}),
    ('Mount Kilimanjaro', {'entities': [(0, 17, 'LOC')]}),
    ('$80000', {'entities': [(0, 6, 'MONEY')]}),
    ('Brazilian', {'entities': [(0, 8, 'NORP')]}),
    ('6th', {'entities': [(0, 3, 'ORDINAL')]}),
    ('Microsoft', {'entities': [(0, 9, 'ORG')]}),
    ('85%', {'entities': [(0, 3, 'PERCENT')]}),
    ('Emma Watson', {'entities': [(0, 11, 'PERSON')]}),
    ('Xbox Series X', {'entities': [(0, 13, 'PRODUCT')]}),
    ('25 kilograms', {'entities': [(0, 12, 'QUANTITY')]}),
    ('2:15 PM', {'entities': [(0, 7, 'TIME')]}),
    ('The Scream by Edvard Munch', {'entities': [(0, 26, 'WORK_OF_ART')]}),
    ('123456', {'entities': [(0, 6, 'CARDINAL')]}),
    ('June 15, 2024', {'entities': [(0, 14, 'DATE')]}),
    ('Olympic Games', {'entities': [(0, 13, 'EVENT')]}),
    ('The Eiffel Tower', {'entities': [(0, 17, 'FAC')]}),
    ('Tokyo', {'entities': [(0, 5, 'GPE')]}),
    ('Chinese', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Bill of Rights', {'entities': [(0, 14, 'LAW')]}),
    ('Grand Canyon', {'entities': [(0, 12, 'LOC')]}),
    ('$120000', {'entities': [(0, 7, 'MONEY')]}),
    ('Mexican', {'entities': [(0, 7, 'NORP')]}),
    ('7th', {'entities': [(0, 3, 'ORDINAL')]}),
    ('Google', {'entities': [(0, 6, 'ORG')]}),
    ('10%', {'entities': [(0, 3, 'PERCENT')]}),
    ('John Smith', {'entities': [(0, 10, 'PERSON')]}),
    ('PlayStation 4', {'entities': [(0, 13, 'PRODUCT')]}),
    ('50 kilograms', {'entities': [(0, 12, 'QUANTITY')]}),
    ('4:30 AM', {'entities': [(0, 7, 'TIME')]}),
    ('Guernica by Pablo Picasso', {'entities': [(0, 25, 'WORK_OF_ART')]}),
    ('67890', {'entities': [(0, 5, 'CARDINAL')]}),
    ('April 10, 2025', {'entities': [(0, 14, 'DATE')]}),
    ('FIFA World Cup', {'entities': [(0, 15, 'EVENT')]}),
    ('The Louvre Museum', {'entities': [(0, 17, 'FAC')]}),
    ('Rio de Janeiro', {'entities': [(0, 13, 'GPE')]}),
    ('Arabic', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Constitution of the United States', {'entities': [(0, 31, 'LAW')]}),
    ('Mount Everest', {'entities': [(0, 12, 'LOC')]}),
    ('$150000', {'entities': [(0, 7, 'MONEY')]}),
    ('Canadian', {'entities': [(0, 8, 'NORP')]}),
    ('8th', {'entities': [(0, 3, 'ORDINAL')]}),
    ('Amazon', {'entities': [(0, 6, 'ORG')]}),
    ('15%', {'entities': [(0, 3, 'PERCENT')]}),
    ('Jane Doe', {'entities': [(0, 8, 'PERSON')]}),
    ('Nintendo Switch', {'entities': [(0, 16, 'PRODUCT')]}),
    ('75 kilograms', {'entities': [(0, 12, 'QUANTITY')]}),
    ('9:45 PM', {'entities': [(0, 7, 'TIME')]}),
    ('The Persistence of Memory by Salvador Dali', {'entities': [(0, 40, 'WORK_OF_ART')]}),
    ('987654', {'entities': [(0, 6, 'CARDINAL')]}),
    ('September 5, 2026', {'entities': [(0, 16, 'DATE')]}),
    ('Super Bowl', {'entities': [(0, 10, 'EVENT')]}),
    ('The Colosseum', {'entities': [(0, 13, 'FAC')]}),
    ('Sydney', {'entities': [(0, 6, 'GPE')]}),
    ('Japanese', {'entities': [(0, 8, 'LANGUAGE')]}),
    ('European Convention on Human Rights', {'entities': [(0, 36, 'LAW')]}),
    ('Machu Picchu', {'entities': [(0, 12, 'LOC')]}),
    ('$200000', {'entities': [(0, 7, 'MONEY')]}),
    ('Australian', {'entities': [(0, 9, 'NORP')]}),
    ('10th', {'entities': [(0, 4, 'ORDINAL')]}),
    ('Tesla', {'entities': [(0, 5, 'ORG')]}),
    ('20%', {'entities': [(0, 3, 'PERCENT')]}),
    ('Michael Johnson', {'entities': [(0, 15, 'PERSON')]}),
    ('Sony PlayStation 5', {'entities': [(0, 20, 'PRODUCT')]}),
    ('100 kilograms', {'entities': [(0, 13, 'QUANTITY')]}),
    ('3:15 AM', {'entities': [(0, 7, 'TIME')]}),
    ('Starry Night by Vincent van Gogh', {'entities': [(0, 31, 'WORK_OF_ART')]}),
    ('876543', {'entities': [(0, 6, 'CARDINAL')]}),
    ('November 20, 2027', {'entities': [(0, 17, 'DATE')]}),
    ('World Series', {'entities': [(0, 12, 'EVENT')]}),
    ('Statue of Liberty', {'entities': [(0, 18, 'FAC')]}),
    ('London', {'entities': [(0, 6, 'GPE')]}),
    ('Korean', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Geneva Conventions', {'entities': [(0, 18, 'LAW')]}),
    ('Great Barrier Reef', {'entities': [(0, 18, 'LOC')]}),
    ('$250000', {'entities': [(0, 7, 'MONEY')]}),
    ('Brazilian', {'entities': [(0, 8, 'NORP')]}),
    ('12th', {'entities': [(0, 4, 'ORDINAL')]}),
    ('Microsoft', {'entities': [(0, 9, 'ORG')]}),
    ('25%', {'entities': [(0, 3, 'PERCENT')]}),
    ('Michelle Obama', {'entities': [(0, 14, 'PERSON')]}),
    ('Nintendo 3DS', {'entities': [(0, 14, 'PRODUCT')]}),
    ('150 kilograms', {'entities': [(0, 13, 'QUANTITY')]}),
    ('6:45 PM', {'entities': [(0, 7, 'TIME')]}),
    ('The Last Supper by Leonardo da Vinci', {'entities': [(0, 35, 'WORK_OF_ART')]}),
    
    # CARDINAL
    ('One', {'entities': [(0, 3, 'CARDINAL')]}),
    ('25', {'entities': [(0, 2, 'CARDINAL')]}),
    ('Three hundred', {'entities': [(0, 13, 'CARDINAL')]}),
    ('50', {'entities': [(0, 1, 'CARDINAL')]}),
    ('Seventy-two', {'entities': [(0, 12, 'CARDINAL')]}),
    ('15', {'entities': [(0, 1, 'CARDINAL')]}),
    ('A hundred and ten', {'entities': [(0, 17, 'CARDINAL')]}),
    ('5,000', {'entities': [(0, 4, 'CARDINAL')]}),
    ('Fifty-nine', {'entities': [(0, 10, 'CARDINAL')]}),
    ('2', {'entities': [(0, 0, 'CARDINAL')]}),
    ('Sixty-seven', {'entities': [(0, 12, 'CARDINAL')]}),
    ('Eighty', {'entities': [(0, 4, 'CARDINAL')]}),
    ('Nine', {'entities': [(0, 3, 'CARDINAL')]}),
    ('20', {'entities': [(0, 1, 'CARDINAL')]}),
    ('Two hundred and fifty', {'entities': [(0, 21, 'CARDINAL')]}),
    ('10,000', {'entities': [(0, 4, 'CARDINAL')]}),
    ('Six', {'entities': [(0, 2, 'CARDINAL')]}),
    ('Thirty', {'entities': [(0, 5, 'CARDINAL')]}),
    ('3.5', {'entities': [(0, 2, 'CARDINAL')]}),
    ('Eleven', {'entities': [(0, 5, 'CARDINAL')]}),
    ('Four', {'entities': [(0, 3, 'CARDINAL')]}),
    ('45', {'entities': [(0, 1, 'CARDINAL')]}),
    ('Eight', {'entities': [(0, 4, 'CARDINAL')]}),
    ('Seventeen', {'entities': [(0, 8, 'CARDINAL')]}),
    
    # DATE
    ('January 1, 2022', {'entities': [(0, 14, 'DATE')]}),
    ('07/15/1995', {'entities': [(0, 9, 'DATE')]}),
    ('22nd November 2010', {'entities': [(0, 17, 'DATE')]}),
    ('03-05-1988', {'entities': [(0, 9, 'DATE')]}),
    ('June 10th, 2023', {'entities': [(0, 15, 'DATE')]}),
    ('12/31/2000', {'entities': [(0, 9, 'DATE')]}),
    ('Aug 5, 1972', {'entities': [(0, 11, 'DATE')]}),
    ('May 15, 2015', {'entities': [(0, 12, 'DATE')]}),
    ('03/07/1986', {'entities': [(0, 9, 'DATE')]}),
    ('September 20, 1998', {'entities': [(0, 18, 'DATE')]}),
    ('25th December, 2025', {'entities': [(0, 20, 'DATE')]}),
    ('10/05/2010', {'entities': [(0, 9, 'DATE')]}),
    ('July 3, 2019', {'entities': [(0, 11, 'DATE')]}),
    ('04-18-2005', {'entities': [(0, 9, 'DATE')]}),
    ('June 30th, 2022', {'entities': [(0, 16, 'DATE')]}),
    ('09/08/1980', {'entities': [(0, 9, 'DATE')]}),
    ('22nd October, 2017', {'entities': [(0, 19, 'DATE')]}),
    ('11-25-1996', {'entities': [(0, 9, 'DATE')]}),
    ('March 12, 2008', {'entities': [(0, 13, 'DATE')]}),
    ('08/22/2014', {'entities': [(0, 9, 'DATE')]}),
    ('14th February, 1992', {'entities': [(0, 19, 'DATE')]}),
    ('05-10-2016', {'entities': [(0, 9, 'DATE')]}),
    ('April 5, 1982', {'entities': [(0, 12, 'DATE')]}),
    ('12-07-2011', {'entities': [(0, 9, 'DATE')]}),
    ('November 8th, 2007', {'entities': [(0, 17, 'DATE')]}),
    
    # EVENT
    ('Super Bowl', {'entities': [(0, 10, 'EVENT')]}),
    ('Woodstock Festival', {'entities': [(0, 18, 'EVENT')]}),
    ('Academic Conference', {'entities': [(0, 19, 'EVENT')]}),
    ('Tech Summit', {'entities': [(0, 11, 'EVENT')]}),
    ('Oscar Awards', {'entities': [(0, 12, 'EVENT')]}),
    ('TEDx Talk', {'entities': [(0, 9, 'EVENT')]}),
    ('Product Launch', {'entities': [(0, 14, 'EVENT')]}),
    ('Book Signing', {'entities': [(0, 12, 'EVENT')]}),
    ('FIFA World Cup', {'entities': [(0, 14, 'EVENT')]}),
    ('Art Exhibition', {'entities': [(0, 15, 'EVENT')]}),
    ('Halloween Party', {'entities': [(0, 15, 'EVENT')]}),
    ('Comic-Con', {'entities': [(0, 8, 'EVENT')]}),
    ('Election Day', {'entities': [(0, 12, 'EVENT')]}),
    ('Startup Pitch', {'entities': [(0, 13, 'EVENT')]}),
    ('Graduation Ceremony', {'entities': [(0, 19, 'EVENT')]}),
    ('International Film Festival', {'entities': [(0, 26, 'EVENT')]}),
    ('Live Concert', {'entities': [(0, 12, 'EVENT')]}),
    ('Fashion Show', {'entities': [(0, 12, 'EVENT')]}),
    ('Comic Book Convention', {'entities': [(0, 21, 'EVENT')]}),
    ('New Year\'s Eve Party', {'entities': [(0, 20, 'EVENT')]}),
    ('SpaceX Launch', {'entities': [(0, 13, 'EVENT')]}),
    ('Independence Day Parade', {'entities': [(0, 23, 'EVENT')]}),
    ('Tech Workshop', {'entities': [(0, 12, 'EVENT')]}),
    ('Music Festival', {'entities': [(0, 14, 'EVENT')]}),
    ('Scientific Symposium', {'entities': [(0, 20, 'EVENT')]}),
    
    # FAC
    ('Eiffel Tower', {'entities': [(0, 13, 'FAC')]}),
    ('White House', {'entities': [(0, 11, 'FAC')]}),
    ('Taj Mahal', {'entities': [(0, 9, 'FAC')]}),
    ('Louvre Museum', {'entities': [(0, 13, 'FAC')]}),
    ('Sydney Opera House', {'entities': [(0, 18, 'FAC')]}),
    ('Great Wall of China', {'entities': [(0, 18, 'FAC')]}),
    ('Statue of Liberty', {'entities': [(0, 17, 'FAC')]}),
    ('Colosseum', {'entities': [(0, 8, 'FAC')]}),
    ('Burj Khalifa', {'entities': [(0, 12, 'FAC')]}),
    ('Machu Picchu', {'entities': [(0, 12, 'FAC')]}),
    ('Grand Canyon', {'entities': [(0, 12, 'FAC')]}),
    ('Stonehenge', {'entities': [(0, 9, 'FAC')]}),
    ('The Vatican', {'entities': [(0, 11, 'FAC')]}),
    ('Mount Rushmore', {'entities': [(0, 14, 'FAC')]}),
    ('Golden Gate Bridge', {'entities': [(0, 18, 'FAC')]}),
    ('Sagrada Familia', {'entities': [(0, 15, 'FAC')]}),
    ('Petronas Towers', {'entities': [(0, 15, 'FAC')]}),
    ('Neuschwanstein Castle', {'entities': [(0, 21, 'FAC')]}),
    ('Red Square', {'entities': [(0, 10, 'FAC')]}),
    ('Disneyland', {'entities': [(0, 9, 'FAC')]}),
    ('The Acropolis', {'entities': [(0, 14, 'FAC')]}),
    ('The Shard', {'entities': [(0, 9, 'FAC')]}),
    ('Mount Everest Base Camp', {'entities': [(0, 24, 'FAC')]}),
    ('The Guggenheim Museum', {'entities': [(0, 21, 'FAC')]}),
    
    # GPE
    ('New York', {'entities': [(0, 8, 'GPE')]}),
    ('Paris', {'entities': [(0, 5, 'GPE')]}),
    ('Tokyo', {'entities': [(0, 5, 'GPE')]}),
    ('London', {'entities': [(0, 6, 'GPE')]}),
    ('Beijing', {'entities': [(0, 7, 'GPE')]}),
    ('Berlin', {'entities': [(0, 5, 'GPE')]}),
    ('Cairo', {'entities': [(0, 5, 'GPE')]}),
    ('Rio de Janeiro', {'entities': [(0, 13, 'GPE')]}),
    ('Sydney', {'entities': [(0, 6, 'GPE')]}),
    ('Mumbai', {'entities': [(0, 6, 'GPE')]}),
    ('Moscow', {'entities': [(0, 6, 'GPE')]}),
    ('Bangkok', {'entities': [(0, 7, 'GPE')]}),
    ('Toronto', {'entities': [(0, 7, 'GPE')]}),
    ('Dubai', {'entities': [(0, 5, 'GPE')]}),
    ('Johannesburg', {'entities': [(0, 12, 'GPE')]}),
    ('Rome', {'entities': [(0, 4, 'GPE')]}),
    ('Cape Town', {'entities': [(0, 9, 'GPE')]}),
    ('Lisbon', {'entities': [(0, 6, 'GPE')]}),
    ('Buenos Aires', {'entities': [(0, 12, 'GPE')]}),
    ('Hanoi', {'entities': [(0, 5, 'GPE')]}),
    ('Montreal', {'entities': [(0, 8, 'GPE')]}),
    ('Manila', {'entities': [(0, 6, 'GPE')]}),
    ('Athens', {'entities': [(0, 6, 'GPE')]}),
    ('Stockholm', {'entities': [(0, 9, 'GPE')]}),
    ('Kathmandu', {'entities': [(0, 9, 'GPE')]}),
    ('Pokhara', {'entities': [(0, 7, 'GPE')]}),
    ('Bhaktapur', {'entities': [(0, 9, 'GPE')]}),
    ('Lalitpur', {'entities': [(0, 8, 'GPE')]}),
    ('Biratnagar', {'entities': [(0, 10, 'GPE')]}),
    ('Butwal', {'entities': [(0, 6, 'GPE')]}),
    ('Dharan', {'entities': [(0, 6, 'GPE')]}),
    ('Dhangadhi', {'entities': [(0, 9, 'GPE')]}),
    ('Bharatpur', {'entities': [(0, 9, 'GPE')]}),
    ('Hetauda', {'entities': [(0, 7, 'GPE')]}),
    ('Janakpur', {'entities': [(0, 8, 'GPE')]}),
    ('Birendranagar', {'entities': [(0, 14, 'GPE')]}),
    ('Birgunj', {'entities': [(0, 7, 'GPE')]}),
    ('Nepalgunj', {'entities': [(0, 9, 'GPE')]}),
    ('Tansen', {'entities': [(0, 6, 'GPE')]}),
    ('Gorkha', {'entities': [(0, 6, 'GPE')]}),
    ('Ilam', {'entities': [(0, 4, 'GPE')]}),
    ('Jiri', {'entities': [(0, 4, 'GPE')]}),
    ('Dolakha', {'entities': [(0, 7, 'GPE')]}),
    ('Rolpa', {'entities': [(0, 5, 'GPE')]}),
    ('Solukhumbu', {'entities': [(0, 10, 'GPE')]}),
    ('Kanchanpur', {'entities': [(0, 10, 'GPE')]}),
    ('Rukum', {'entities': [(0, 5, 'GPE')]}),
    ('Bardiya', {'entities': [(0, 6, 'GPE')]}),
    ('Mustang', {'entities': [(0, 7, 'GPE')]}),
    
    # LANGUAGE
    ('NEPALI', {'entities':[(0,6,'LANGUAGE')]}),
    ('English', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Spanish', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Chinese', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Hindi', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Arabic', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('French', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Portuguese', {'entities': [(0, 9, 'LANGUAGE')]}),
    ('Russian', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Japanese', {'entities': [(0, 8, 'LANGUAGE')]}),
    ('German', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Italian', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Korean', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Dutch', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Swedish', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Greek', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Hebrew', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Polish', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Turkish', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Vietnamese', {'entities': [(0, 9, 'LANGUAGE')]}),
    ('Thai', {'entities': [(0, 4, 'LANGUAGE')]}),
    ('Swahili', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Finnish', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Tagalog', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Norwegian', {'entities': [(0, 8, 'LANGUAGE')]}),
    ('English', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Spanish', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Chinese', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Hindi', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Arabic', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('French', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Portuguese', {'entities': [(0, 9, 'LANGUAGE')]}),
    ('Russian', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Japanese', {'entities': [(0, 8, 'LANGUAGE')]}),
    ('German', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Italian', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Korean', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Dutch', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Swedish', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Greek', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Hebrew', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Polish', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Turkish', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Vietnamese', {'entities': [(0, 9, 'LANGUAGE')]}),
    ('Thai', {'entities': [(0, 4, 'LANGUAGE')]}),
    ('Swahili', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Finnish', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Tagalog', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Norwegian', {'entities': [(0, 8, 'LANGUAGE')]}),
    
    # LAWS
    ('Constitution', {'entities': [(0, 12, 'LAW')]}),
    ('Legal Code', {'entities': [(0, 10, 'LAW')]}),
    ('Criminal Law', {'entities': [(0, 12, 'LAW')]}),
    ('Patent Act', {'entities': [(0, 10, 'LAW')]}),
    ('Family Law', {'entities': [(0, 10, 'LAW')]}),
    ('Labor Law', {'entities': [(0, 9, 'LAW')]}),
    ('Contract Law', {'entities': [(0, 12, 'LAW')]}),
    ('Environmental Law', {'entities': [(0, 16, 'LAW')]}),
    ('Intellectual Property Law', {'entities': [(0, 24, 'LAW')]}),
    ('Tort Law', {'entities': [(0, 8, 'LAW')]}),
    ('Corporate Law', {'entities': [(0, 12, 'LAW')]}),
    ('Human Rights Law', {'entities': [(0, 15, 'LAW')]}),
    ('Constitutional Law', {'entities': [(0, 17, 'LAW')]}),
    ('Bankruptcy Law', {'entities': [(0, 14, 'LAW')]}),
    ('Criminal Code', {'entities': [(0, 13, 'LAW')]}),
    ('Civil Law', {'entities': [(0, 9, 'LAW')]}),
    ('Tax Law', {'entities': [(0, 8, 'LAW')]}),
    ('Privacy Law', {'entities': [(0, 11, 'LAW')]}),
    ('Antitrust Law', {'entities': [(0, 13, 'LAW')]}),
    ('International Law', {'entities': [(0, 17, 'LAW')]}),
    ('Immigration Law', {'entities': [(0, 14, 'LAW')]}),
    ('Criminal Procedure', {'entities': [(0, 18, 'LAW')]}),
    ('Conveyancing Act', {'entities': [(0, 16, 'LAW')]}),
    ('Consumer Protection Law', {'entities': [(0, 22, 'LAW')]}),
    
    # LOC
    ('Mount Everest', {'entities': [(0, 12, 'LOC')]}),
    ('Great Barrier Reef', {'entities': [(0, 18, 'LOC')]}),
    ('Amazon Rainforest', {'entities': [(0, 17, 'LOC')]}),
    ('Sahara Desert', {'entities': [(0, 13, 'LOC')]}),
    ('Grand Canyon', {'entities': [(0, 12, 'LOC')]}),
    ('Eiffel Tower', {'entities': [(0, 12, 'LOC')]}),
    ('Machu Picchu', {'entities': [(0, 12, 'LOC')]}),
    ('Taj Mahal', {'entities': [(0, 9, 'LOC')]}),
    ('Great Wall of China', {'entities': [(0, 19, 'LOC')]}),
    ('Sydney Opera House', {'entities': [(0, 18, 'LOC')]}),
    ('Pyramids of Giza', {'entities': [(0, 15, 'LOC')]}),
    ('Yellowstone National Park', {'entities': [(0, 25, 'LOC')]}),
    ('Venice', {'entities': [(0, 6, 'LOC')]}),
    ('Mount Fuji', {'entities': [(0, 10, 'LOC')]}),
    ('Victoria Falls', {'entities': [(0, 14, 'LOC')]}),
    ('Galápagos Islands', {'entities': [(0, 17, 'LOC')]}),
    ('Santorini', {'entities': [(0, 9, 'LOC')]}),
    ('Banff National Park', {'entities': [(0, 18, 'LOC')]}),
    ('Serengenti National Park', {'entities': [(0, 24, 'LOC')]}),
    ('Mesa Verde', {'entities': [(0, 10, 'LOC')]}),
    ('Niagara Falls', {'entities': [(0, 13, 'LOC')]}),
    ('Iguazu Falls', {'entities': [(0, 12, 'LOC')]}),
    ('Victoria Harbour', {'entities': [(0, 16, 'LOC')]}),
    ('Aoraki / Mount Cook', {'entities': [(0, 20, 'LOC')]}),
    
    # MONEY
    ('$1,000', {'entities': [(0, 6, 'MONEY')]}),
    ('$100,000', {'entities': [(0, 8, 'MONEY')]}),
    ('$1,000,000', {'entities': [(0, 10, 'MONEY')]}),
    ('$10,000,000', {'entities': [(0, 11, 'MONEY')]}),
    ('$100,000,000', {'entities': [(0, 12, 'MONEY')]}),
    ('$1', {'entities': [(0, 2, 'MONEY')]}),
    ('$100', {'entities': [(0, 4, 'MONEY')]}),
    ('$1,000', {'entities': [(0, 6, 'MONEY')]}),
    ('$10,000', {'entities': [(0, 7, 'MONEY')]}),
    ('$100,000', {'entities': [(0, 8, 'MONEY')]}),
    ('$1,000,000', {'entities': [(0, 10, 'MONEY')]}),
    ('$10', {'entities': [(0, 3, 'MONEY')]}),
    ('$100', {'entities': [(0, 4, 'MONEY')]}),
    ('$1,000', {'entities': [(0, 6, 'MONEY')]}),
    ('$10,000', {'entities': [(0, 7, 'MONEY')]}),
    ('$100,000', {'entities': [(0, 8, 'MONEY')]}),
    ('$1,000,000', {'entities': [(0, 10, 'MONEY')]}),
    ('€1,000', {'entities': [(0, 6, 'MONEY')]}),
    ('€100,000', {'entities': [(0, 8, 'MONEY')]}),
    ('€1,000,000', {'entities': [(0, 10, 'MONEY')]}),
    ('£1,000', {'entities': [(0, 6, 'MONEY')]}),
    ('£100,000', {'entities': [(0, 8, 'MONEY')]}),
    ('£1,000,000', {'entities': [(0, 10, 'MONEY')]}),
    
    # NORP
    ('American', {'entities': [(0, 8, 'NORP')]}),
    ('British', {'entities': [(0, 7, 'NORP')]}),
    ('Chinese', {'entities': [(0, 7, 'NORP')]}),
    ('Indian', {'entities': [(0, 6, 'NORP')]}),
    ('Russian', {'entities': [(0, 7, 'NORP')]}),
    ('Japanese', {'entities': [(0, 8, 'NORP')]}),
    ('French', {'entities': [(0, 6, 'NORP')]}),
    ('German', {'entities': [(0, 6, 'NORP')]}),
    ('Italian', {'entities': [(0, 7, 'NORP')]}),
    ('Brazilian', {'entities': [(0, 9, 'NORP')]}),
    ('Mexican', {'entities': [(0, 7, 'NORP')]}),
    ('Australian', {'entities': [(0, 9, 'NORP')]}),
    ('Canadian', {'entities': [(0, 7, 'NORP')]}),
    ('South Korean', {'entities': [(0, 12, 'NORP')]}),
    ('Russian', {'entities': [(0, 7, 'NORP')]}),
    ('Mexican', {'entities': [(0, 7, 'NORP')]}),
    ('Chinese', {'entities': [(0, 7, 'NORP')]}),
    ('Indian', {'entities': [(0, 6, 'NORP')]}),
    ('Japanese', {'entities': [(0, 8, 'NORP')]}),
    ('Brazilian', {'entities': [(0, 9, 'NORP')]}),
    ('German', {'entities': [(0, 6, 'NORP')]}),
    ('Italian', {'entities': [(0, 7, 'NORP')]}),
    ('Australian', {'entities': [(0, 9, 'NORP')]}),
    ('Canadian', {'entities': [(0, 7, 'NORP')]}),
    ('African', {'entities': [(0, 7, 'NORP')]}),
    ('European', {'entities': [(0, 8, 'NORP')]}),
    ('Asian', {'entities': [(0, 5, 'NORP')]}),
    ('Latin American', {'entities': [(0, 14, 'NORP')]}),
    ('Middle Eastern', {'entities': [(0, 14, 'NORP')]}),
    ('Hindu', {'entities': [(0, 5, 'NORP')]}),
    ('Muslim', {'entities': [(0, 6, 'NORP')]}),
    ('Buddhist', {'entities': [(0, 8, 'NORP')]}),
    ('Christian', {'entities': [(0, 9, 'NORP')]}),
    ('Jewish', {'entities': [(0, 6, 'NORP')]}),
    ('Republican', {'entities': [(0, 10, 'NORP')]}),
    ('Democrat', {'entities': [(0, 7, 'NORP')]}),
    ('Communist', {'entities': [(0, 9, 'NORP')]}),
    ('Socialist', {'entities': [(0, 8, 'NORP')]}),
    ('Capitalist', {'entities': [(0, 10, 'NORP')]}),
    ('Conservative', {'entities': [(0, 12, 'NORP')]}),
    ('Liberal', {'entities': [(0, 6, 'NORP')]}),
    ('Nationalist', {'entities': [(0, 10, 'NORP')]}),
    ('Globalist', {'entities': [(0, 8, 'NORP')]}),
    ('Hindu', {'entities': [(0, 5, 'NORP')]}),
    ('Buddhist', {'entities': [(0, 8, 'NORP')]}),
    ('Christian', {'entities': [(0, 9, 'NORP')]}),
    ('Jewish', {'entities': [(0, 6, 'NORP')]}),
    
    # ORDINAL
    ('1st', {'entities': [(0, 3, 'ORDINAL')]}),
    ('2nd', {'entities': [(0, 3, 'ORDINAL')]}),
    ('3rd', {'entities': [(0, 3, 'ORDINAL')]}),
    ('4th', {'entities': [(0, 3, 'ORDINAL')]}),
    ('5th', {'entities': [(0, 3, 'ORDINAL')]}),
    ('10th', {'entities': [(0, 4, 'ORDINAL')]}),
    ('15th', {'entities': [(0, 4, 'ORDINAL')]}),
    ('20th', {'entities': [(0, 4, 'ORDINAL')]}),
    ('25th', {'entities': [(0, 4, 'ORDINAL')]}),
    ('30th', {'entities': [(0, 4, 'ORDINAL')]}),
    ('50th', {'entities': [(0, 4, 'ORDINAL')]}),
    ('75th', {'entities': [(0, 4, 'ORDINAL')]}),
    ('100th', {'entities': [(0, 5, 'ORDINAL')]}),
    ('first', {'entities': [(0, 5, 'ORDINAL')]}),
    ('second', {'entities': [(0, 6, 'ORDINAL')]}),
    ('third', {'entities': [(0, 5, 'ORDINAL')]}),
    ('fourth', {'entities': [(0, 6, 'ORDINAL')]}),
    ('fifth', {'entities': [(0, 5, 'ORDINAL')]}),
    ('sixth', {'entities': [(0, 5, 'ORDINAL')]}),
    ('seventh', {'entities': [(0, 7, 'ORDINAL')]}),
    ('eighth', {'entities': [(0, 6, 'ORDINAL')]}),
    ('ninth', {'entities': [(0, 5, 'ORDINAL')]}),
    ('tenth', {'entities': [(0, 5, 'ORDINAL')]}),
    ('eleventh', {'entities': [(0, 8, 'ORDINAL')]}),
    ('twelfth', {'entities': [(0, 7, 'ORDINAL')]}),
    ('thirteenth', {'entities': [(0, 9, 'ORDINAL')]}),
    ('fourteenth', {'entities': [(0, 10, 'ORDINAL')]}),
    ('fifteenth', {'entities': [(0, 9, 'ORDINAL')]}),
    ('sixteenth', {'entities': [(0, 9, 'ORDINAL')]}),
    ('seventeenth', {'entities': [(0, 11, 'ORDINAL')]}),
    ('eighteenth', {'entities': [(0, 10, 'ORDINAL')]}),
    ('nineteenth', {'entities': [(0, 10, 'ORDINAL')]}),
    ('twentieth', {'entities': [(0, 8, 'ORDINAL')]}),
    ('twenty-first', {'entities': [(0, 12, 'ORDINAL')]}),
    ('twenty-second', {'entities': [(0, 13, 'ORDINAL')]}),
    ('thirtieth', {'entities': [(0, 8, 'ORDINAL')]}),
    ('fortieth', {'entities': [(0, 8, 'ORDINAL')]}),
    
    # ORG
    ('OpenAI', {'entities': [(0, 6, 'ORG')]}),
    ('Google', {'entities': [(0, 6, 'ORG')]}),
    ('Microsoft', {'entities': [(0, 9, 'ORG')]}),
    ('Facebook', {'entities': [(0, 8, 'ORG')]}),
    ('Amazon', {'entities': [(0, 6, 'ORG')]}),
    ('Apple Inc.', {'entities': [(0, 10, 'ORG')]}),
    ('Tesla', {'entities': [(0, 5, 'ORG')]}),
    ('SpaceX', {'entities': [(0, 5, 'ORG')]}),
    ('IBM', {'entities': [(0, 3, 'ORG')]}),
    ('Intel', {'entities': [(0, 5, 'ORG')]}),
    ('Adobe', {'entities': [(0, 5, 'ORG')]}),
    ('Netflix', {'entities': [(0, 7, 'ORG')]}),
    ('Uber', {'entities': [(0, 4, 'ORG')]}),
    ('Airbnb', {'entities': [(0, 6, 'ORG')]}),
    ('Twitter', {'entities': [(0, 7, 'ORG')]}),
    ('LinkedIn', {'entities': [(0, 8, 'ORG')]}),
    ('Reddit', {'entities': [(0, 6, 'ORG')]}),
    ('GitHub', {'entities': [(0, 6, 'ORG')]}),
    ('Stack Overflow', {'entities': [(0, 14, 'ORG')]}),
    ('Docker', {'entities': [(0, 6, 'ORG')]}),
    ('Harvard University', {'entities': [(0, 18, 'ORG')]}),
    ('Stanford University', {'entities': [(0, 19, 'ORG')]}),
    ('Massachusetts Institute of Technology', {'entities': [(0, 38, 'ORG')]}),
    ('University of Oxford', {'entities': [(0, 20, 'ORG')]}),
    ('California Institute of Technology', {'entities': [(0, 35, 'ORG')]}),
    ('Princeton University', {'entities': [(0, 19, 'ORG')]}),
    ('Yale University', {'entities': [(0, 15, 'ORG')]}),
    ('University of Cambridge', {'entities': [(0, 24, 'ORG')]}),
    ('ETH Zurich', {'entities': [(0, 9, 'ORG')]}),
    ('Carnegie Mellon University', {'entities': [(0, 26, 'ORG')]}),
    ('University of Chicago', {'entities': [(0, 21, 'ORG')]}),
    ('University of Tokyo', {'entities': [(0, 19, 'ORG')]}),
    ('Indian Institutes of Technology', {'entities': [(0, 29, 'ORG')]}),
    ('Stanford Online High School', {'entities': [(0, 26, 'ORG')]}),
    ('Khan Academy', {'entities': [(0, 12, 'ORG')]}),
    ('Coursera', {'entities': [(0, 8, 'ORG')]}),
    ('edX', {'entities': [(0, 3, 'ORG')]}),
    ('Udacity', {'entities': [(0, 7, 'ORG')]}),
    ('MIT OpenCourseWare', {'entities': [(0, 17, 'ORG')]}),
    ('Codecademy', {'entities': [(0, 10, 'ORG')]}),
    ('University of California, Berkeley', {'entities': [(0, 31, 'ORG')]}),
    ('London Business School', {'entities': [(0, 21, 'ORG')]}),
    ('Columbia Business School', {'entities': [(0, 23, 'ORG')]}),
    ('INSEAD', {'entities': [(0, 6, 'ORG')]}),
    ('University of Maryland, College Park', {'entities': [(0, 35, 'ORG')]}),
    ('Purdue University', {'entities': [(0, 18, 'ORG')]}),
    ('Indian Institutes of Technology', {'entities': [(0, 30, 'ORG')]}),
    ('Indian Institute of Science', {'entities': [(0, 26, 'ORG')]}),
    ('National Institute of Fashion Technology', {'entities': [(0, 42, 'ORG')]}),
    ('Jawaharlal Nehru University', {'entities': [(0, 27, 'ORG')]}),
    ('Delhi Technological University', {'entities': [(0, 29, 'ORG')]}),
    ('National Institute of Technology', {'entities': [(0, 29, 'ORG')]}),
    ('All India Institute of Medical Sciences', {'entities': [(0, 39, 'ORG')]}),
    ('Amity University', {'entities': [(0, 15, 'ORG')]}),
    ('BITS Pilani', {'entities': [(0, 11, 'ORG')]}),
    ('IIM Ahmedabad', {'entities': [(0, 13, 'ORG')]}),
    ('IIT Bombay', {'entities': [(0, 10, 'ORG')]}),
    ('IISc Bangalore', {'entities': [(0, 14, 'ORG')]}),
    ('AIIMS Delhi', {'entities': [(0, 10, 'ORG')]}),
    ('Manipal Institute of Technology', {'entities': [(0, 29, 'ORG')]}),
    ('Indian Statistical Institute', {'entities': [(0, 26, 'ORG')]}),
    ('St. Xavier\'s College', {'entities': [(0, 18, 'ORG')]}),
    ('Loyola College', {'entities': [(0, 13, 'ORG')]}),
    ('Presidency College', {'entities': [(0, 17, 'ORG')]}),
    ('DPS RK Puram', {'entities': [(0, 13, 'ORG')]}),
    ('La Martiniere College', {'entities': [(0, 20, 'ORG')]}),
    ('St. Stephen\'s College', {'entities': [(0, 20, 'ORG')]}),
    ('Bishop Cotton School', {'entities': [(0, 19, 'ORG')]}),
    ('Mayo College', {'entities': [(0, 12, 'ORG')]}),
    ('Sherwood College', {'entities': [(0, 17, 'ORG')]}),
    ('Modern School', {'entities': [(0, 13, 'ORG')]}),
    ('Doon School', {'entities': [(0, 11, 'ORG')]}),
    ('Welham Boys\' School', {'entities': [(0, 18, 'ORG')]}),
    ('Lawrence School, Sanawar', {'entities': [(0, 24, 'ORG')]}),
    ('Birla Institute of Technology and Science', {'entities': [(0, 42, 'ORG')]}),
    ('National Law School of India University', {'entities': [(0, 43, 'ORG')]}),
    ('Indian School of Business', {'entities': [(0, 25, 'ORG')]}),
    ('Indian Statistical Institute', {'entities': [(0, 26, 'ORG')]}),
    ('Film and Television Institute of India', {'entities': [(0, 38, 'ORG')]}),
    ('Indian Institute of Foreign Trade', {'entities': [(0, 31, 'ORG')]}),
    ('NIFT Delhi', {'entities': [(0, 10, 'ORG')]}),
    ('Symbiosis International University', {'entities': [(0, 34, 'ORG')]}),
    ('Jadavpur University', {'entities': [(0, 18, 'ORG')]}),
    ('University of Pune', {'entities': [(0, 17, 'ORG')]}),
    ('Mumbai University', {'entities': [(0, 16, 'ORG')]}),
    ('Osmania University', {'entities': [(0, 17, 'ORG')]}),
    ('Anna University', {'entities': [(0, 14, 'ORG')]}),
    ('Jawaharlal Nehru Technological University', {'entities': [(0, 44, 'ORG')]}),
    ('Bharathiar University', {'entities': [(0, 20, 'ORG')]}),
    ('Bangalore University', {'entities': [(0, 18, 'ORG')]}),
    ('Calicut University', {'entities': [(0, 16, 'ORG')]}),
    ('University of Madras', {'entities': [(0, 19, 'ORG')]}),
    ('Christ University', {'entities': [(0, 15, 'ORG')]}),
    ('Banaras Hindu University', {'entities': [(0, 24, 'ORG')]}),
    ('Aligarh Muslim University', {'entities': [(0, 23, 'ORG')]}),
    ('Jamia Millia Islamia', {'entities': [(0, 20, 'ORG')]}),
    ('Panjab University', {'entities': [(0, 16, 'ORG')]}),
    ('University of Calcutta', {'entities': [(0, 21, 'ORG')]}),
    ('IISER Pune', {'entities': [(0, 10, 'ORG')]}),
    ('IISER Kolkata', {'entities': [(0, 12, 'ORG')]}),
    ('IISER Thiruvananthapuram', {'entities': [(0, 21, 'ORG')]}),
    ('IISER Tirupati', {'entities': [(0, 13, 'ORG')]}),
    ('Delhi Public School', {'entities': [(0, 20, 'ORG')]}),
    ('La Martiniere Kolkata', {'entities': [(0, 23, 'ORG')]}),
    ('Modern School Delhi', {'entities': [(0, 19, 'ORG')]}),
    ('Bishop Cotton Boys\' School', {'entities': [(0, 26, 'ORG')]}),
    ('Mayo College', {'entities': [(0, 12, 'ORG')]}),
    ('St. Xavier\'s Collegiate School', {'entities': [(0, 29, 'ORG')]}),
    ('The Doon School', {'entities': [(0, 17, 'ORG')]}),
    ('Welham Girls\' School', {'entities': [(0, 19, 'ORG')]}),
    ('Sherwood College', {'entities': [(0, 17, 'ORG')]}),
    ('Montfort School', {'entities': [(0, 14, 'ORG')]}),
    ('Birla Vidya Niketan', {'entities': [(0, 18, 'ORG')]}),
    ('Sri Aurobindo International Centre of Education', {'entities': [(0, 47, 'ORG')]}),
    ('The Scindia School', {'entities': [(0, 18, 'ORG')]}),
    ('DPS RK Puram', {'entities': [(0, 13, 'ORG')]}),
    ('DPS Vasant Kunj', {'entities': [(0, 15, 'ORG')]}),
    ('La Martiniere Lucknow', {'entities': [(0, 23, 'ORG')]}),
    ('Modern School Barakhamba Road', {'entities': [(0, 27, 'ORG')]}),
    ('Bishop Cotton Girls\' School', {'entities': [(0, 28, 'ORG')]}),
    ('Mayo College Girls School', {'entities': [(0, 27, 'ORG')]}),
    ('St. Xavier\'s High School', {'entities': [(0, 24, 'ORG')]}),
    ('The Doon School', {'entities': [(0, 17, 'ORG')]}),
    ('Welham Boys\' School', {'entities': [(0, 18, 'ORG')]}),
    ('Sherwood College', {'entities': [(0, 17, 'ORG')]}),
    ('Montfort School', {'entities': [(0, 14, 'ORG')]}),
    ('Birla Vidya Niketan', {'entities': [(0, 18, 'ORG')]}),
    ('Sri Aurobindo International Centre of Education', {'entities': [(0, 47, 'ORG')]}),
    ('The Scindia School', {'entities': [(0, 18, 'ORG')]}),
    ('DPS RK Puram', {'entities': [(0, 13, 'ORG')]}),
    ('DPS Vasant Kunj', {'entities': [(0, 15, 'ORG')]}),
    ('La Martiniere Lucknow', {'entities': [(0, 23, 'ORG')]}),
    ('Modern School Barakhamba Road', {'entities': [(0, 27, 'ORG')]}),
    ('Bishop Cotton Girls\' School', {'entities': [(0, 28, 'ORG')]}),
    ('Mayo College Girls School', {'entities': [(0, 27, 'ORG')]}),
    ('St. Xavier\'s High School', {'entities': [(0, 24, 'ORG')]}),
    ('National Academy of Sciences', {'entities': [(0, 25, 'ORG')]}),
    ('American Academy of Arts and Sciences', {'entities': [(0, 36, 'ORG')]}),
    ('Royal Academy of Arts', {'entities': [(0, 20, 'ORG')]}),
    ('Indian Academy of Sciences', {'entities': [(0, 25, 'ORG')]}),
    ('National Academy of Engineering', {'entities': [(0, 30, 'ORG')]}),
    ('National Academy of Medicine', {'entities': [(0, 25, 'ORG')]}),
    ('Academy of Motion Picture Arts and Sciences', {'entities': [(0, 45, 'ORG')]}),
    ('Indian National Science Academy', {'entities': [(0, 30, 'ORG')]}),
    ('National Academy of Design', {'entities': [(0, 24, 'ORG')]}),
    ('Academy of Sciences for the Developing World', {'entities': [(0, 42, 'ORG')]}),
    ('American Academy in Rome', {'entities': [(0, 22, 'ORG')]}),
    ('United States Military Academy', {'entities': [(0, 29, 'ORG')]}),
    ('Russian Academy of Sciences', {'entities': [(0, 25, 'ORG')]}),
    ('Academy of Natural Sciences of Drexel University', {'entities': [(0, 49, 'ORG')]}),
    ('National Academy of Public Administration', {'entities': [(0, 37, 'ORG')]}),
    ('National Academy of Recording Arts and Sciences', {'entities': [(0, 47, 'ORG')]}),
    ('American Academy of Pediatrics', {'entities': [(0, 28, 'ORG')]}),
    ('International Academy of Astronautics', {'entities': [(0, 35, 'ORG')]}),
    ('Academy of Country Music', {'entities': [(0, 23, 'ORG')]}),
    ('Juilliard School', {'entities': [(0, 15, 'ORG')]}),
    ('School of Visual Arts', {'entities': [(0, 20, 'ORG')]}),
    ('Oxford University Business School', {'entities': [(0, 31, 'ORG')]}),
    ('Tuck School of Business at Dartmouth', {'entities': [(0, 34, 'ORG')]}),
    ('Cambridge College', {'entities': [(0, 16, 'ORG')]}),
    ('New York Film Academy', {'entities': [(0, 21, 'ORG')]}),
    ('Berklee College of Music', {'entities': [(0, 24, 'ORG')]}),
    ('London College of Fashion', {'entities': [(0, 24, 'ORG')]}),
    ('School of the Art Institute of Chicago', {'entities': [(0, 37, 'ORG')]}),
    ('Royal College of Surgeons in Ireland', {'entities': [(0, 35, 'ORG')]}),
    ('XYZ Technologies', {'entities': [(0, 15, 'ORG')]}),
    ('Innovate Solutions Inc.', {'entities': [(0, 23, 'ORG')]}),
    ('Global Innovations Co.', {'entities': [(0, 22, 'ORG')]}),
    ('Apex Systems Group', {'entities': [(0, 18, 'ORG')]}),
    ('Pinnacle Solutions Ltd.', {'entities': [(0, 22, 'ORG')]}),
    ('Tech Dynamics Corporation', {'entities': [(0, 26, 'ORG')]}),
    ('Infinite Innovations Pvt. Ltd.', {'entities': [(0, 29, 'ORG')]}),
    ('Synergy Solutions LLC', {'entities': [(0, 21, 'ORG')]}),
    ('Vista Enterprises Ltd.', {'entities': [(0, 21, 'ORG')]}),
    ('Strategic Solutions International', {'entities': [(0, 32, 'ORG')]}),
    ('Quantum Innovations Co.', {'entities': [(0, 24, 'ORG')]}),
    ('InnoSys Tech Solutions', {'entities': [(0, 23, 'ORG')]}),
    ('Horizon Systems Pvt. Ltd.', {'entities': [(0, 25, 'ORG')]}),
    ('Dynamic Solutions Inc.', {'entities': [(0, 23, 'ORG')]}),
    ('TechCore Innovations', {'entities': [(0, 20, 'ORG')]}),
    ('Accel Solutions Ltd.', {'entities': [(0, 21, 'ORG')]}),
    ('Synergetic Systems Pvt. Ltd.', {'entities': [(0, 28, 'ORG')]}),
    ('NexGen Dynamics Corporation', {'entities': [(0, 29, 'ORG')]}),
    ('OmniTech Solutions International', {'entities': [(0, 33, 'ORG')]}),
    ('Strive Systems Co.', {'entities': [(0, 18, 'ORG')]}),
    
    
    
    # PERCENT
     ('10%', {'entities': [(0, 3, 'PERCENT')]}),
    ('25%', {'entities': [(0, 3, 'PERCENT')]}),
    ('50%', {'entities': [(0, 3, 'PERCENT')]}),
    ('75%', {'entities': [(0, 3, 'PERCENT')]}),
    ('90%', {'entities': [(0, 3, 'PERCENT')]}),
    ('15%', {'entities': [(0, 3, 'PERCENT')]}),
    ('30%', {'entities': [(0, 3, 'PERCENT')]}),
    ('60%', {'entities': [(0, 3, 'PERCENT')]}),
    ('80%', {'entities': [(0, 3, 'PERCENT')]}),
    ('95%', {'entities': [(0, 3, 'PERCENT')]}),
    ('20%', {'entities': [(0, 3, 'PERCENT')]}),
    ('40%', {'entities': [(0, 3, 'PERCENT')]}),
    ('70%', {'entities': [(0, 3, 'PERCENT')]}),
    ('85%', {'entities': [(0, 3, 'PERCENT')]}),
    ('99%', {'entities': [(0, 3, 'PERCENT')]}),
    ('5%', {'entities': [(0, 2, 'PERCENT')]}),
    ('45%', {'entities': [(0, 3, 'PERCENT')]}),
    ('65%', {'entities': [(0, 3, 'PERCENT')]}),
    ('88%', {'entities': [(0, 3, 'PERCENT')]}),
    ('100%', {'entities': [(0, 4, 'PERCENT')]}),
    
    # PERSON
    ('John Smith', {'entities': [(0, 10, 'PERSON')]}),
    ('Mary Johnson', {'entities': [(0, 12, 'PERSON')]}),
    ('Muhammad Ali', {'entities': [(0, 13, 'PERSON')]}),
    ('David Cohen', {'entities': [(0, 11, 'PERSON')]}),
    ('Sara Khan', {'entities': [(0, 9, 'PERSON')]}),
    ('Hannah Patel', {'entities': [(0, 11, 'PERSON')]}),
    ('Ravi Verma', {'entities': [(0, 10, 'PERSON')]}),
    ('Aisha Siddiqui', {'entities': [(0, 14, 'PERSON')]}),
    ('Isaac Lee', {'entities': [(0, 9, 'PERSON')]}),
    ('Leah Goldberg', {'entities': [(0, 12, 'PERSON')]}),
    ('Daniel Wang', {'entities': [(0, 11, 'PERSON')]}),
    ('Yuki Tanaka', {'entities': [(0, 11, 'PERSON')]}),
    ('Priya Desai', {'entities': [(0, 11, 'PERSON')]}),
    ('Kenji Yamamoto', {'entities': [(0, 13, 'PERSON')]}),
    ('Fatima Ahmed', {'entities': [(0, 12, 'PERSON')]}),
    ('Caleb O Connor', {'entities': [(0, 13, 'PERSON')]}),
    ('Sofia Rodriguez', {'entities': [(0, 15, 'PERSON')]}),
    ('Anand Kapoor', {'entities': [(0, 12, 'PERSON')]}),
    ('Amara Khan', {'entities': [(0, 10, 'PERSON')]}),
    ('Alexandra Davis', {'entities': [(0, 15, 'PERSON')]}),
    ('Raj Patel', {'entities': [(0, 9, 'PERSON')]}),
    ('Liam Nguyen', {'entities': [(0, 11, 'PERSON')]}),
    ('Aisha Gonzalez', {'entities': [(0, 14, 'PERSON')]}),
    ('Mateo Chavez', {'entities': [(0, 12, 'PERSON')]}),
    ('Elena Kim', {'entities': [(0, 9, 'PERSON')]}),
    ('Omar Al-Farsi', {'entities': [(0, 12, 'PERSON')]}),
    ('Sophie Chang', {'entities': [(0, 12, 'PERSON')]}),
    ('Javier Rodriguez', {'entities': [(0, 15, 'PERSON')]}),
    ('Zara Ali Khan', {'entities': [(0, 13, 'PERSON')]}),
    ('Nina Santoro', {'entities': [(0, 11, 'PERSON')]}),
    ('Abdul Malik', {'entities': [(0, 10, 'PERSON')]}),
    ('Kai Johnson', {'entities': [(0, 11, 'PERSON')]}),
    ('Maya O Connor', {'entities': [(0, 13, 'PERSON')]}),
    ('Caleb Thomas', {'entities': [(0, 11, 'PERSON')]}),
    ('Priya Mehta', {'entities': [(0, 11, 'PERSON')]}),
    ('Leonardo Silva', {'entities': [(0, 13, 'PERSON')]}),
    ('Luna Hernandez', {'entities': [(0, 13, 'PERSON')]}),
    ('Asher Wang', {'entities': [(0, 10, 'PERSON')]}),
    ('Zahra Brown', {'entities': [(0, 10, 'PERSON')]}),
    ('Elijah Kimura', {'entities': [(0, 12, 'PERSON')]}),
    ('Aarav Das', {'entities': [(0, 9, 'PERSON')]}),
    ('Emilia Garcia', {'entities': [(0, 13, 'PERSON')]}),
    ('Kaden Taylor', {'entities': [(0, 11, 'PERSON')]}),
    ('Aarav Thapa', {'entities': [(0, 11, 'PERSON')]}),
    ('Sneha Rai', {'entities': [(0, 9, 'PERSON')]}),
    ('Rohan Joshi', {'entities': [(0, 10, 'PERSON')]}),
    ('Sara Shrestha', {'entities': [(0, 13, 'PERSON')]}),
    ('Amit Tamang', {'entities': [(0, 10, 'PERSON')]}),
    ('Nina Gurung', {'entities': [(0, 10, 'PERSON')]}),
    ('Rajesh Lama', {'entities': [(0, 10, 'PERSON')]}),
    ('Priya Khatri', {'entities': [(0, 11, 'PERSON')]}),
    ('Suman Rai', {'entities': [(0, 9, 'PERSON')]}),
    ('Ashmita Thakur', {'entities': [(0, 13, 'PERSON')]}),
    ('Anil Magar', {'entities': [(0, 9, 'PERSON')]}),
    ('Sadhana Bhattarai', {'entities': [(0, 15, 'PERSON')]}),
    ('Bikash Singh', {'entities': [(0, 11, 'PERSON')]}),
    ('Saraswati Gautam', {'entities': [(0, 15, 'PERSON')]}),
    ('Arjun Bhandari', {'entities': [(0, 12, 'PERSON')]}),
    ('Anjali Pandey', {'entities': [(0, 12, 'PERSON')]}),
    ('Nabin Thapa', {'entities': [(0, 10, 'PERSON')]}),
    ('Meera Chhetri', {'entities': [(0, 12, 'PERSON')]}),
    ('Roshan Magar', {'entities': [(0, 11, 'PERSON')]}),
    ('Simran Rai', {'entities': [(0, 9, 'PERSON')]}),
    ('Aryan Shakya', {'entities': [(0, 11, 'PERSON')]}),
    ('Sita Ghimire', {'entities': [(0, 11, 'PERSON')]}),
    ('Raj Kumar Joshi', {'entities': [(0, 15, 'PERSON')]}),
    ('Anushka Lama', {'entities': [(0, 11, 'PERSON')]}),
    ('Pramod Adhikari', {'entities': [(0, 14, 'PERSON')]}),
    
    # PRODUCT
    ('iPhone 13', {'entities': [(0, 9, 'PRODUCT')]}),
    ('Dell XPS 15', {'entities': [(0, 11, 'PRODUCT')]}),
    ('PlayStation 5', {'entities': [(0, 14, 'PRODUCT')]}),
    ('Tesla Model 3', {'entities': [(0, 14, 'PRODUCT')]}),
    ('Nike Air Max', {'entities': [(0, 11, 'PRODUCT')]}),
    ('Canon EOS R5', {'entities': [(0, 12, 'PRODUCT')]}),
    ('Samsung Galaxy S21', {'entities': [(0, 18, 'PRODUCT')]}),
    ('MacBook Pro', {'entities': [(0, 11, 'PRODUCT')]}),
    ('Fitbit Charge 5', {'entities': [(0, 14, 'PRODUCT')]}),
    ('Sony WH-1000XM4', {'entities': [(0, 15, 'PRODUCT')]}),
    ('Kindle Paperwhite', {'entities': [(0, 16, 'PRODUCT')]}),
    ('Google Pixel 6', {'entities': [(0, 14, 'PRODUCT')]}),
    ('Bose QuietComfort 35 II', {'entities': [(0, 21, 'PRODUCT')]}),
    ('Adidas Ultraboost', {'entities': [(0, 17, 'PRODUCT')]}),
    ('Microsoft Surface Laptop 4', {'entities': [(0, 26, 'PRODUCT')]}),
    ('GoPro Hero 10', {'entities': [(0, 13, 'PRODUCT')]}),
    ('LG OLED C1', {'entities': [(0, 10, 'PRODUCT')]}),
    ('Nintendo Switch', {'entities': [(0, 16, 'PRODUCT')]}),
    ('OnePlus 9 Pro', {'entities': [(0, 14, 'PRODUCT')]}),
    ('Fujifilm X-T4', {'entities': [(0, 13, 'PRODUCT')]}),
    ('Garmin Forerunner 945', {'entities': [(0, 21, 'PRODUCT')]}),
    ('Sony A7 III', {'entities': [(0, 11, 'PRODUCT')]}),
    ('Vitamix 5200', {'entities': [(0, 11, 'PRODUCT')]}),
    ('Logitech MX Master 3', {'entities': [(0, 19, 'PRODUCT')]}),
    ('Xbox Series X', {'entities': [(0, 13, 'PRODUCT')]}),
    ('AirPods Pro', {'entities': [(0, 11, 'PRODUCT')]}),
    ('Razer Blade 15', {'entities': [(0, 14, 'PRODUCT')]}),
    ('Huawei MateBook X Pro', {'entities': [(0, 19, 'PRODUCT')]}),
    ('Canon 5D Mark IV', {'entities': [(0, 16, 'PRODUCT')]}),
    ('Bose SoundLink Revolve', {'entities': [(0, 21, 'PRODUCT')]}),
    ('Microsoft Xbox Elite Series 2', {'entities': [(0, 28, 'PRODUCT')]}),
    ('Ring Video Doorbell Pro', {'entities': [(0, 23, 'PRODUCT')]}),
    ('Garmin Instinct Solar', {'entities': [(0, 20, 'PRODUCT')]}),
    ('JBL Charge 4', {'entities': [(0, 12, 'PRODUCT')]}),
    ('DJI Mavic Air 2', {'entities': [(0, 14, 'PRODUCT')]}),
    ('Samsung Odyssey G9', {'entities': [(0, 16, 'PRODUCT')]}),
    ('Apple Watch Series 7', {'entities': [(0, 20, 'PRODUCT')]}),
    ('Nikon Z6', {'entities': [(0, 9, 'PRODUCT')]}),
    ('Sonos Beam', {'entities': [(0, 11, 'PRODUCT')]}),
    ('Lenovo Legion 5', {'entities': [(0, 15, 'PRODUCT')]}),
    ('Philips Hue Starter Kit', {'entities': [(0, 21, 'PRODUCT')]}),
    ('Fitbit Versa 3', {'entities': [(0, 13, 'PRODUCT')]}),
    ('Sony WF-1000XM4', {'entities': [(0, 14, 'PRODUCT')]}),
    ('LG Gram 17', {'entities': [(0, 10, 'PRODUCT')]}),
    ('Breville Smart Oven Air', {'entities': [(0, 23, 'PRODUCT')]}),
    ('Panasonic Lumix GH5', {'entities': [(0, 17, 'PRODUCT')]}),
    ('Google Nest Hub Max', {'entities': [(0, 18, 'PRODUCT')]}),
    ('Samsung QLED Q90T', {'entities': [(0, 15, 'PRODUCT')]}),
    ('Logitech G Pro X Wireless', {'entities': [(0, 24, 'PRODUCT')]}),
    ('Roku Ultra', {'entities': [(0, 10, 'PRODUCT')]}),
    ('Canon PIXMA MX922', {'entities': [(0, 16, 'PRODUCT')]}),
    ('Sony Xperia 1 III', {'entities': [(0, 16, 'PRODUCT')]}),
     ('Laptop', {'entities': [(0, 6, 'PRODUCT')]}),
    ('Smartphone', {'entities': [(0, 9, 'PRODUCT')]}),
    ('Camera', {'entities': [(0, 6, 'PRODUCT')]}),
    ('Headphones', {'entities': [(0, 9, 'PRODUCT')]}),
    ('Smartwatch', {'entities': [(0, 9, 'PRODUCT')]}),
    ('Tablet', {'entities': [(0, 5, 'PRODUCT')]}),
    ('Gaming Console', {'entities': [(0, 15, 'PRODUCT')]}),
    ('Printer', {'entities': [(0, 7, 'PRODUCT')]}),
    ('Fitness Tracker', {'entities': [(0, 14, 'PRODUCT')]}),
    ('Kitchen Appliance', {'entities': [(0, 17, 'PRODUCT')]}),
    ('Desktop Computer', {'entities': [(0, 16, 'PRODUCT')]}),
    ('Home Theater System', {'entities': [(0, 18, 'PRODUCT')]}),
    ('Wireless Speaker', {'entities': [(0, 15, 'PRODUCT')]}),
    ('Blender', {'entities': [(0, 6, 'PRODUCT')]}),
    ('Vacuum Cleaner', {'entities': [(0, 14, 'PRODUCT')]}),
    ('Coffee Maker', {'entities': [(0, 12, 'PRODUCT')]}),
    ('Smart Bulbs', {'entities': [(0, 11, 'PRODUCT')]}),
    ('Gaming Laptop', {'entities': [(0, 13, 'PRODUCT')]}),
    ('External Hard Drive', {'entities': [(0, 19, 'PRODUCT')]}),
    ('Electric Toothbrush', {'entities': [(0, 18, 'PRODUCT')]}),
    ('Router', {'entities': [(0, 6, 'PRODUCT')]}),
    ('Mouse', {'entities': [(0, 4, 'PRODUCT')]}),
    ('Keyboard', {'entities': [(0, 7, 'PRODUCT')]}),
    ('Portable Charger', {'entities': [(0, 16, 'PRODUCT')]}),
    ('Sunglasses', {'entities': [(0, 9, 'PRODUCT')]}),
    ('Backpack', {'entities': [(0, 7, 'PRODUCT')]}),
    ('Fitness Equipment', {'entities': [(0, 17, 'PRODUCT')]}),
    ('Wireless Earbuds', {'entities': [(0, 15, 'PRODUCT')]}),
    ('Projector', {'entities': [(0, 8, 'PRODUCT')]}),
    ('Sleeping Bag', {'entities': [(0, 12, 'PRODUCT')]}),
    ('Blu-ray Player', {'entities': [(0, 14, 'PRODUCT')]}),
    ('Action Camera', {'entities': [(0, 13, 'PRODUCT')]}),
    ('Car Accessories', {'entities': [(0, 15, 'PRODUCT')]}),
    ('Office Chair', {'entities': [(0, 11, 'PRODUCT')]}),
    ('Tool Kit', {'entities': [(0, 8, 'PRODUCT')]}),
    ('Dumbbells', {'entities': [(0, 8, 'PRODUCT')]}),
    ('Digital Watch', {'entities': [(0, 12, 'PRODUCT')]}),
    ('Security Camera', {'entities': [(0, 15, 'PRODUCT')]}),
    ('Telescope', {'entities': [(0, 9, 'PRODUCT')]}),
    ('Hair Dryer', {'entities': [(0, 10, 'PRODUCT')]}),
    ('Power Bank', {'entities': [(0, 9, 'PRODUCT')]}),
    ('Binoculars', {'entities': [(0, 9, 'PRODUCT')]}),
    ('Smart Thermostat', {'entities': [(0, 16, 'PRODUCT')]}),
    ('Air Purifier', {'entities': [(0, 12, 'PRODUCT')]}),
    ('Digital Camera', {'entities': [(0, 14, 'PRODUCT')]}),
    ('Electric Shaver', {'entities': [(0, 15, 'PRODUCT')]}),
    ('Portable Speaker', {'entities': [(0, 15, 'PRODUCT')]}),
    ('Waffle Maker', {'entities': [(0, 12, 'PRODUCT')]}),
     
    # QUANTITY
    ('5 kilograms', {'entities': [(0, 12, 'QUANTITY')]}),
    ('3 liters', {'entities': [(0, 9, 'QUANTITY')]}),
    ('2 dozen', {'entities': [(0, 8, 'QUANTITY')]}),
    ('10 meters', {'entities': [(0, 9, 'QUANTITY')]}),
    ('4 units', {'entities': [(0, 8, 'QUANTITY')]}),
    ('8 hours', {'entities': [(0, 8, 'QUANTITY')]}),
    ('15 pieces', {'entities': [(0, 10, 'QUANTITY')]}),
    ('6 packs', {'entities': [(0, 8, 'QUANTITY')]}),
    ('20 centimeters', {'entities': [(0, 14, 'QUANTITY')]}),
    ('7 bottles', {'entities': [(0, 9, 'QUANTITY')]}),
    ('1 pair', {'entities': [(0, 6, 'QUANTITY')]}),
    ('12 boxes', {'entities': [(0, 9, 'QUANTITY')]}),
    ('3 dozen', {'entities': [(0, 8, 'QUANTITY')]}),
    ('25 units', {'entities': [(0, 9, 'QUANTITY')]}),
    ('9 feet', {'entities': [(0, 7, 'QUANTITY')]}),
    ('30 grams', {'entities': [(0, 8, 'QUANTITY')]}),
    ('5 pieces', {'entities': [(0, 9, 'QUANTITY')]}),
    ('2 liters', {'entities': [(0, 8, 'QUANTITY')]}),
    ('8 boxes', {'entities': [(0, 8, 'QUANTITY')]}),
    ('15 meters', {'entities': [(0, 10, 'QUANTITY')]}),
    ('4 pairs', {'entities': [(0, 8, 'QUANTITY')]}),
    ('10 dozen', {'entities': [(0, 9, 'QUANTITY')]}),
    ('6 bottles', {'entities': [(0, 9, 'QUANTITY')]}),
    ('3 units', {'entities': [(0, 8, 'QUANTITY')]}),
    ('18 packs', {'entities': [(0, 9, 'QUANTITY')]}),
    
    # TIME
    ('3 hours', {'entities': [(0, 7, 'TIME')]}),
    ('1 week', {'entities': [(0, 6, 'TIME')]}),
    ('2 days', {'entities': [(0, 6, 'TIME')]}),
    ('4 months', {'entities': [(0, 8, 'TIME')]}),
    ('10 minutes', {'entities': [(0, 10, 'TIME')]}),
    ('5 years', {'entities': [(0, 7, 'TIME')]}),
    ('6 weeks', {'entities': [(0, 7, 'TIME')]}),
    ('8 days', {'entities': [(0, 6, 'TIME')]}),
    ('15 hours', {'entities': [(0, 8, 'TIME')]}),
    ('2 weeks', {'entities': [(0, 7, 'TIME')]}),
    ('7 months', {'entities': [(0, 8, 'TIME')]}),
    ('3 years', {'entities': [(0, 7, 'TIME')]}),
    ('1 day', {'entities': [(0, 5, 'TIME')]}),
    ('4 hours', {'entities': [(0, 7, 'TIME')]}),
    ('12 weeks', {'entities': [(0, 8, 'TIME')]}),
    ('6 months', {'entities': [(0, 8, 'TIME')]}),
    ('9 days', {'entities': [(0, 6, 'TIME')]}),
    ('20 minutes', {'entities': [(0, 10, 'TIME')]}),
    ('5 weeks', {'entities': [(0, 7, 'TIME')]}),
    ('3 minutes', {'entities': [(0, 8, 'TIME')]}),
    ('8 months', {'entities': [(0, 8, 'TIME')]}),
    ('1 hour', {'entities': [(0, 6, 'TIME')]}),
    ('10 days', {'entities': [(0, 7, 'TIME')]}),
    ('2 months', {'entities': [(0, 8, 'TIME')]}),
    
    # WORK_OF_ART
    ('Mona Lisa', {'entities': [(0, 9, 'WORK_OF_ART')]}),
    ('Starry Night', {'entities': [(0, 12, 'WORK_OF_ART')]}),
    ('The Persistence of Memory', {'entities': [(0, 25, 'WORK_OF_ART')]}),
    ('Guernica', {'entities': [(0, 7, 'WORK_OF_ART')]}),
    ('The Last Supper', {'entities': [(0, 16, 'WORK_OF_ART')]}),
    ('The Scream', {'entities': [(0, 11, 'WORK_OF_ART')]}),
    ('Water Lilies', {'entities': [(0, 13, 'WORK_OF_ART')]}),
    ('The Birth of Venus', {'entities': [(0, 18, 'WORK_OF_ART')]}),
    ('The Thinker', {'entities': [(0, 12, 'WORK_OF_ART')]}),
    ('The Girl with a Pearl Earring', {'entities': [(0, 28, 'WORK_OF_ART')]}),
    ('David', {'entities': [(0, 5, 'WORK_OF_ART')]}),
    ('Les Demoiselles d\'Avignon', {'entities': [(0, 26, 'WORK_OF_ART')]}),
    ('The Starry Night', {'entities': [(0, 15, 'WORK_OF_ART')]}),
    ('The Great Wave off Kanagawa', {'entities': [(0, 27, 'WORK_OF_ART')]}),
    ('American Gothic', {'entities': [(0, 15, 'WORK_OF_ART')]}),
    ('The Night Watch', {'entities': [(0, 15, 'WORK_OF_ART')]}),
    ('The School of Athens', {'entities': [(0, 19, 'WORK_OF_ART')]}),
    ('The Kiss', {'entities': [(0, 9, 'WORK_OF_ART')]}),
    ('The Birth of Adam', {'entities': [(0, 16, 'WORK_OF_ART')]}),
    ('The Death of Sardanapalus', {'entities': [(0, 25, 'WORK_OF_ART')]}),
    ('The Creation of Adam', {'entities': [(0, 20, 'WORK_OF_ART')]}),
    ('Liberty Leading the People', {'entities': [(0, 25, 'WORK_OF_ART')]}),
    ('The Garden of Earthly Delights', {'entities': [(0, 29, 'WORK_OF_ART')]}),
    ('The Arnolfini Portrait', {'entities': [(0, 23, 'WORK_OF_ART')]}),
    ('The Birth of Venus', {'entities': [(0, 18, 'WORK_OF_ART')]})
]

In [61]:
involvement_DATA = [
    ('Volunteer at Local Animal Shelter', {'entities': [(0, 30, 'INVOLVEMENT')]}),
    ('Student Ambassador for Red Cross', {'entities': [(0, 31, 'INVOLVEMENT')]}),
    ('Organized Coding Bootcamp for High School Students', {'entities': [(0, 48, 'INVOLVEMENT')]}),
    ('Hackathon Organizer at TechFest 2022', {'entities': [(0, 39, 'INVOLVEMENT')]}),
    ('Participated in Hult Prize 2021', {'entities': [(0, 28, 'INVOLVEMENT')]}),
    ('Mentor at Code for Kids Workshop', {'entities': [(0, 32, 'INVOLVEMENT')]}),
    ('Volunteer at Local Food Bank', {'entities': [(0, 30, 'INVOLVEMENT')]}),
    ('Student Ambassador for NAAMI', {'entities': [(0, 27, 'INVOLVEMENT')]}),
    ('Organized Datathon for Data Science Club', {'entities': [(0, 41, 'INVOLVEMENT')]}),
    ('Participant in Coding Competition 2023', {'entities': [(0, 38, 'INVOLVEMENT')]}),
    ('Hult Prize Regional Summit Participant', {'entities': [(0, 36, 'INVOLVEMENT')]}),
    ('Community Service at Local Homeless Shelter', {'entities': [(0, 42, 'INVOLVEMENT')]}),
    ('Student Ambassador for ANAIS', {'entities': [(0, 30, 'INVOLVEMENT')]}),
    ('Organized Code with Coffee Event', {'entities': [(0, 33, 'INVOLVEMENT')]}),
    ('Participant in National Coding Challenge', {'entities': [(0, 38, 'INVOLVEMENT')]}),
    ('Mentor at Django Workshop', {'entities': [(0, 25, 'INVOLVEMENT')]}),
    ('Volunteer at Local Environmental Cleanup', {'entities': [(0, 38, 'INVOLVEMENT')]}),
    ('Organized Machine Learning Seminar', {'entities': [(0, 34, 'INVOLVEMENT')]}),
    ('Participant in CodeJam 2022', {'entities': [(0, 27, 'INVOLVEMENT')]}),
    ('Hult Prize On-Campus: 1st Runner Up', {'entities': [(0, 31, 'INVOLVEMENT')]}),
    ('Coding Competition Judge at Tech Expo', {'entities': [(0, 38, 'INVOLVEMENT')]}),
    ('Community Outreach for STEM Education', {'entities': [(0, 37, 'INVOLVEMENT')]}),
    ('Student Ambassador for i-CES', {'entities': [(0, 29, 'INVOLVEMENT')]}),
    ('Organized AI Workshop for Beginners', {'entities': [(0, 36, 'INVOLVEMENT')]}),
    ('Participant in Data Science Datathon', {'entities': [(0, 35, 'INVOLVEMENT')]}),
    ('Organized Photography Exhibition for Charity', {'entities': [(0, 45, 'INVOLVEMENT')]}),
    ('Mentorship Program for Underprivileged Youth', {'entities': [(0, 41, 'INVOLVEMENT')]}),
    ('Participated in Poetry Slam Event', {'entities': [(0, 32, 'INVOLVEMENT')]}),
    ('Facilitator at Mindfulness and Wellness Workshop', {'entities': [(0, 45, 'INVOLVEMENT')]}),
    ('Community Gardening Project Coordinator', {'entities': [(0, 38, 'INVOLVEMENT')]}),
    ('Youth Empowerment Symposium Organizer', {'entities': [(0, 35, 'INVOLVEMENT')]}),
    ('Coding Club Advisor at Local High School', {'entities': [(0, 38, 'INVOLVEMENT')]}),
    ('Participant in Model United Nations Conference', {'entities': [(0, 42, 'INVOLVEMENT')]}),
    ('Public Speaking Coach for Toastmasters Club', {'entities': [(0, 42, 'INVOLVEMENT')]}),
    ('Organizer of Environmental Sustainability Campaign', {'entities': [(0, 47, 'INVOLVEMENT')]}),
    ('Facilitated Art Therapy Sessions for Mental Health', {'entities': [(0, 48, 'INVOLVEMENT')]}),
    ('Community Outreach for Tech Literacy Program', {'entities': [(0, 43, 'INVOLVEMENT')]}),
    ('Participant in Cultural Exchange Program', {'entities': [(0, 38, 'INVOLVEMENT')]}),
    ('Facilitator for Creative Writing Workshop', {'entities': [(0, 37, 'INVOLVEMENT')]}),
    ('Organizer of Science Fair for Local Schools', {'entities': [(0, 42, 'INVOLVEMENT')]}),
    ('Volunteer Coordinator for Disaster Relief Team', {'entities': [(0, 45, 'INVOLVEMENT')]}),
    ('Mentor for First-Generation College Students', {'entities': [(0, 40, 'INVOLVEMENT')]}),
    ('Organized Inclusive Gaming Tournament', {'entities': [(0, 34, 'INVOLVEMENT')]}),
    ('Participant in Innovation Challenge Competition', {'entities': [(0, 45, 'INVOLVEMENT')]}),
    ('Facilitator for Team Building Workshops', {'entities': [(0, 39, 'INVOLVEMENT')]}),
    ('Organizer of Diversity and Inclusion Symposium', {'entities': [(0, 47, 'INVOLVEMENT')]}),
    ('Mentor for LGBTQ+ Support Group', {'entities': [(0, 31, 'INVOLVEMENT')]}),
    ('Participant in Global Health Awareness Campaign', {'entities': [(0, 45, 'INVOLVEMENT')]}),
    ('Facilitator for Coding for Kids Program', {'entities': [(0, 39, 'INVOLVEMENT')]}),
    ('Organized TEDx Event on Technology and Innovation', {'entities': [(0, 49, 'INVOLVEMENT')]})
]

In [62]:
socials_DATA = [
    ('LinkedIn: https://www.linkedin.com/in/johndoe', {'entities': [(0, 50, 'SOCIALS')]}),
    ('Twitter: @johndoe_tweets', {'entities': [(0, 21, 'SOCIALS')]}),
    ('GitHub: https://github.com/johndoe', {'entities': [(0, 37, 'SOCIALS')]}),
    ('Instagram: @johndoe_photos', {'entities': [(0, 25, 'SOCIALS')]}),
    ('Facebook: https://www.facebook.com/johndoe', {'entities': [(0, 45, 'SOCIALS')]}),
    ('Email: johndoe@example.com', {'entities': [(0, 23, 'SOCIALS')]}),
    ('Phone: +1 (555) 123-4567', {'entities': [(0, 21, 'SOCIALS')]}),
    ('LinkedIn Profile: linkedin.com/in/janedoe', {'entities': [(0, 39, 'SOCIALS')]}),
    ('Twitter Handle: @janedoe_tweets', {'entities': [(0, 25, 'SOCIALS')]}),
    ('GitHub Account: github.com/janedoe', {'entities': [(0, 33, 'SOCIALS')]}),
    ('Instagram Username: @janedoe_photos', {'entities': [(0, 29, 'SOCIALS')]}),
    ('Facebook Profile: facebook.com/janedoe', {'entities': [(0, 35, 'SOCIALS')]}),
    ('Email Address: janedoe@example.com', {'entities': [(0, 28, 'SOCIALS')]}),
    ('Phone Number: +1 (555) 987-6543', {'entities': [(0, 26, 'SOCIALS')]}),
    ('LinkedIn: linkedin.com/in/alexsmith', {'entities': [(0, 35, 'SOCIALS')]}),
    ('Twitter: @alexsmith_tweets', {'entities': [(0, 24, 'SOCIALS')]}),
    ('GitHub: github.com/alexsmith', {'entities': [(0, 29, 'SOCIALS')]}),
    ('Instagram: @alexsmith_photos', {'entities': [(0, 26, 'SOCIALS')]}),
    ('Facebook: facebook.com/alexsmith', {'entities': [(0, 31, 'SOCIALS')]}),
    ('Email: alexsmith@example.com', {'entities': [(0, 21, 'SOCIALS')]}),
    ('Phone: +1 (555) 789-0123', {'entities': [(0, 21, 'SOCIALS')]}),
    ('LinkedIn Profile: https://linkedin.com/in/sarahjane', {'entities': [(0, 47, 'SOCIALS')]}),
    ('Twitter Handle: @sarahjane_tweets', {'entities': [(0, 28, 'SOCIALS')]}),
    ('GitHub Account: https://github.com/sarahjane', {'entities': [(0, 41, 'SOCIALS')]}),
    ('Instagram Username: @sarahjane_photos', {'entities': [(0, 34, 'SOCIALS')]}),
    ('Facebook Profile: https://facebook.com/sarahjane', {'entities': [(0, 39, 'SOCIALS')]}),
    ('Email Address: sarahjane@example.com', {'entities': [(0, 28, 'SOCIALS')]}),
    ('Phone Number: +1 (555) 345-6789', {'entities': [(0, 26, 'SOCIALS')]}),
    ('LinkedIn: linkedin.com/in/markjohnson', {'entities': [(0, 36, 'SOCIALS')]}),
    ('Twitter: @markjohnson_tweets', {'entities': [(0, 25, 'SOCIALS')]}),
    ('GitHub: github.com/markjohnson', {'entities': [(0, 30, 'SOCIALS')]}),
    ('Instagram: @markjohnson_photos', {'entities': [(0, 27, 'SOCIALS')]}),
    ('Facebook: facebook.com/markjohnson', {'entities': [(0, 32, 'SOCIALS')]}),
    ('Email: markjohnson@example.com', {'entities': [(0, 22, 'SOCIALS')]}),
    ('Phone: +1 (555) 678-9012', {'entities': [(0, 21, 'SOCIALS')]}),
    ('@johndoe_tweets', {'entities': [(0, 16, 'SOCIALS')]}),
    ('https://github.com/johndoe', {'entities': [(0, 29, 'SOCIALS')]}),
    ('linkedin.com/in/johndoe', {'entities': [(0, 28, 'SOCIALS')]}),
    ('@john_doe_tweets', {'entities': [(0, 18, 'SOCIALS')]}),
    ('github.com/johndoe', {'entities': [(0, 25, 'SOCIALS')]}),
    ('@john_doe_photos', {'entities': [(0, 21, 'SOCIALS')]}),
    ('facebook.com/johndoe', {'entities': [(0, 22, 'SOCIALS')]}),
    ('john.doe@example.com', {'entities': [(0, 20, 'SOCIALS')]}),
    ('+1 (555) 123-7890', {'entities': [(0, 18, 'SOCIALS')]}),
    ('linkedin.com/in/alice_smith', {'entities': [(0, 32, 'SOCIALS')]}),
    ('@alice_tweets', {'entities': [(0, 14, 'SOCIALS')]}),
    ('github.com/alicesmith', {'entities': [(0, 21, 'SOCIALS')]}),
    ('@alice_photos', {'entities': [(0, 16, 'SOCIALS')]}),
    ('facebook.com/alicesmith', {'entities': [(0, 21, 'SOCIALS')]}),
    ('alice.smith@example.com', {'entities': [(0, 20, 'SOCIALS')]}),
    ('+1 (555) 987-6543', {'entities': [(0, 18, 'SOCIALS')]}),
    ('linkedin.com/in/mark_jones', {'entities': [(0, 28, 'SOCIALS')]}),
    ('@mark_tweets', {'entities': [(0, 14, 'SOCIALS')]}),
    ('github.com/markjones', {'entities': [(0, 19, 'SOCIALS')]}),
    ('@mark_photos', {'entities': [(0, 16, 'SOCIALS')]}),
    ('facebook.com/markjones', {'entities': [(0, 19, 'SOCIALS')]})
]

In [63]:
education_DATA = [
    ('B.Tech in Computer Science', {'entities': [(0, 25, 'EDUCATION')]}),
    ('Bachelors of Computer Engineering, ABC University', {'entities': [(0, 49, 'EDUCATION')]}),
    ('M.S. in Artificial Intelligence, XYZ Institute of Technology', {'entities': [(0, 58, 'EDUCATION')]}),
    ('Bachelor of Science in Information Technology, University of ABC', {'entities': [(0, 61, 'EDUCATION')]}),
    ('Ph.D. in Computer Science, DEF Research Institute', {'entities': [(0, 49, 'EDUCATION')]}),
    ('Diploma in Web Development, Institute of XYZ', {'entities': [(0, 42, 'EDUCATION')]}),
    ('Master of Computer Applications, LMN University', {'entities': [(0, 49, 'EDUCATION')]}),
    ('B.E. in Electronics and Communication, PQR College', {'entities': [(0, 51, 'EDUCATION')]}),
    ('Bachelor of Technology in Mechanical Engineering, LMN Institute', {'entities': [(0, 66, 'EDUCATION')]}),
    ('M.Sc. in Data Science, University of XYZ', {'entities': [(0, 40, 'EDUCATION')]}),
    ('Bachelors in Software Engineering, ABCD College of Technology', {'entities': [(0, 57, 'EDUCATION')]}),
    ('Master of Science in Cybersecurity, LMN School of Information Security', {'entities': [(0, 68, 'EDUCATION')]}),
    ('Diploma in Computer Networks, LMN Institute of Technology', {'entities': [(0, 53, 'EDUCATION')]}),
    ('Bachelor of Arts in Graphic Design, University of ABCD', {'entities': [(0, 48, 'EDUCATION')]}),
    ('M.Tech in Robotics and Automation, XYZ College of Engineering', {'entities': [(0, 59, 'EDUCATION')]}),
    ('Associate Degree in Information Systems, PQR Community College', {'entities': [(0, 59, 'EDUCATION')]}),
    ('Ph.D. in Computational Linguistics, LMN Language Research Institute', {'entities': [(0, 69, 'EDUCATION')]}),
    ('Master of Business Administration, ABCD School of Management', {'entities': [(0, 57, 'EDUCATION')]}),
    ('B.Sc. in Mathematics and Statistics, University of XYZ', {'entities': [(0, 52, 'EDUCATION')]}),
    ('Postgraduate Diploma in Machine Learning, LMN Academy of Technology', {'entities': [(0, 66, 'EDUCATION')]}),
    ('Certificate in Digital Marketing, PQR Marketing Institute', {'entities': [(0, 54, 'EDUCATION')]}),
    ('Bachelor of Fine Arts in Animation, ABCD Art School', {'entities': [(0, 48, 'EDUCATION')]}),
    ('Masters in Environmental Science, LMN Institute of Environmental Studies', {'entities': [(0, 70, 'EDUCATION')]}),
    ('Certification in Ethical Hacking', {'entities': [(0, 27, 'EDUCATION')]}),
    ('Diploma in Data Science and Machine Learning', {'entities': [(0, 43, 'EDUCATION')]}),
    ('Associate Degree in Network Administration', {'entities': [(0, 38, 'EDUCATION')]}),
    ('Certified Information Systems Security Professional (CISSP)', {'entities': [(0, 58, 'EDUCATION')]}),
    ('AWS Certified Solutions Architect - Associate', {'entities': [(0, 47, 'EDUCATION')]}),
    ('CompTIA A+ Certification', {'entities': [(0, 23, 'EDUCATION')]}),
    ('Microsoft Certified: Azure Fundamentals', {'entities': [(0, 42, 'EDUCATION')]}),
    ('Certification in Python Programming', {'entities': [(0, 34, 'EDUCATION')]}),
    ('Red Hat Certified Engineer (RHCE)', {'entities': [(0, 32, 'EDUCATION')]}),
    ('Certified ScrumMaster (CSM)', {'entities': [(0, 26, 'EDUCATION')]}),
    ('Project Management Professional (PMP)', {'entities': [(0, 34, 'EDUCATION')]}),
    ('Cisco Certified Network Associate (CCNA)', {'entities': [(0, 41, 'EDUCATION')]}),
    ('Certification in Mobile App Development', {'entities': [(0, 38, 'EDUCATION')]})
]

In [64]:
experience_DATA = [
    ('Software Engineer at ABC Tech Solutions', {'entities': [(0, 38, 'EXPERIENCE')]}),
    ('Data Scientist Intern at XYZ Analytics', {'entities': [(0, 38, 'EXPERIENCE')]}),
    ('Product Manager at Acme Innovations', {'entities': [(0, 36, 'EXPERIENCE')]}),
    ('Web Developer at TechHub Solutions', {'entities': [(0, 34, 'EXPERIENCE')]}),
    ('UX/UI Designer at Creative Minds Studio', {'entities': [(0, 38, 'EXPERIENCE')]}),
    ('Network Administrator at Global Networks Inc.', {'entities': [(0, 44, 'EXPERIENCE')]}),
    ('Machine Learning Engineer at Data Insights Ltd.', {'entities': [(0, 48, 'EXPERIENCE')]}),
    ('IT Consultant at Smart Solutions Group', {'entities': [(0, 38, 'EXPERIENCE')]}),
    ('Cybersecurity Analyst at Secure Systems Corp', {'entities': [(0, 42, 'EXPERIENCE')]}),
    ('Business Intelligence Analyst at Insightful Analytics', {'entities': [(0, 51, 'EXPERIENCE')]}),
    ('Software Development Manager at Tech Innovators Ltd.', {'entities': [(0, 53, 'EXPERIENCE')]}),
    ('DevOps Engineer at CloudOps Technologies', {'entities': [(0, 39, 'EXPERIENCE')]}),
    ('Quality Assurance Tester at QA Solutions Inc.', {'entities': [(0, 43, 'EXPERIENCE')]}),
    ('Digital Marketing Specialist at Brand Boosters', {'entities': [(0, 47, 'EXPERIENCE')]}),
    ('System Administrator at Tech Systems Support', {'entities': [(0, 44, 'EXPERIENCE')]}),
    ('AI Research Scientist at Neural Networks Research Lab', {'entities': [(0, 54, 'EXPERIENCE')]}),
    ('Full Stack Developer at CodeCrafters Studio', {'entities': [(0, 45, 'EXPERIENCE')]}),
    ('Software Architect at Innovate Solutions Inc.', {'entities': [(0, 45, 'EXPERIENCE')]}),
    ('Software Engineer', {'entities': [(0, 16, 'EXPERIENCE')]}),
    ('Data Scientist', {'entities': [(0, 14, 'EXPERIENCE')]}),
    ('Product Manager', {'entities': [(0, 15, 'EXPERIENCE')]}),
    ('Web Developer', {'entities': [(0, 13, 'EXPERIENCE')]}),
    ('UX/UI Designer', {'entities': [(0, 14, 'EXPERIENCE')]}),
    ('Network Administrator', {'entities': [(0, 21, 'EXPERIENCE')]}),
    ('Machine Learning Engineer', {'entities': [(0, 25, 'EXPERIENCE')]}),
    ('IT Consultant', {'entities': [(0, 13, 'EXPERIENCE')]}),
    ('Cybersecurity Analyst', {'entities': [(0, 20, 'EXPERIENCE')]}),
    ('Business Intelligence Analyst', {'entities': [(0, 25, 'EXPERIENCE')]}),
    ('Software Development Manager', {'entities': [(0, 27, 'EXPERIENCE')]}),
    ('DevOps Engineer', {'entities': [(0, 16, 'EXPERIENCE')]}),
    ('Quality Assurance Tester', {'entities': [(0, 22, 'EXPERIENCE')]}),
    ('Digital Marketing Specialist', {'entities': [(0, 26, 'EXPERIENCE')]}),
    ('System Administrator', {'entities': [(0, 19, 'EXPERIENCE')]}),
    ('AI Research Scientist', {'entities': [(0, 21, 'EXPERIENCE')]}),
    ('Full Stack Developer', {'entities': [(0, 21, 'EXPERIENCE')]}),
    ('Software Architect', {'entities': [(0, 18, 'EXPERIENCE')]}),
    ('Business Analyst', {'entities': [(0, 16, 'EXPERIENCE')]}),
    ('Project Manager', {'entities': [(0, 15, 'EXPERIENCE')]}),
    ('UI/UX Lead', {'entities': [(0, 11, 'EXPERIENCE')]}),
    ('Data Engineer', {'entities': [(0, 13, 'EXPERIENCE')]}),
    ('Security Consultant', {'entities': [(0, 19, 'EXPERIENCE')]}),
    ('Frontend Developer', {'entities': [(0, 18, 'EXPERIENCE')]}),
    ('Cloud Solutions Architect', {'entities': [(0, 24, 'EXPERIENCE')]}),
    ('Product Owner', {'entities': [(0, 13, 'EXPERIENCE')]}),
    ('IT Support Specialist', {'entities': [(0, 20, 'EXPERIENCE')]}),
    ('Business Development Manager', {'entities': [(0, 27, 'EXPERIENCE')]}),
    ('Data Analyst', {'entities': [(0, 12, 'EXPERIENCE')]}),
    ('Machine Learning Researcher', {'entities': [(0, 26, 'EXPERIENCE')]}),
    ('QA Automation Engineer', {'entities': [(0, 21, 'EXPERIENCE')]}),
    ('Digital Content Strategist', {'entities': [(0, 26, 'EXPERIENCE')]}),
    ('Systems Analyst', {'entities': [(0, 15, 'EXPERIENCE')]}),
    ('Technical Writer', {'entities': [(0, 16, 'EXPERIENCE')]}),
    ('IT Project Coordinator', {'entities': [(0, 22, 'EXPERIENCE')]}),
    ('Data Architect', {'entities': [(0, 14, 'EXPERIENCE')]}),
    ('Mobile App Developer', {'entities': [(0, 20, 'EXPERIENCE')]}),
    ('Network Engineer', {'entities': [(0, 15, 'EXPERIENCE')]}),
    ('ERP Consultant', {'entities': [(0, 15, 'EXPERIENCE')]}),
    ('Digital Marketing Manager', {'entities': [(0, 24, 'EXPERIENCE')]}),
    ('Infrastructure Engineer', {'entities': [(0, 22, 'EXPERIENCE')]}),
    ('IT Trainer', {'entities': [(0, 8, 'EXPERIENCE')]}),
    ('Database Administrator', {'entities': [(0, 22, 'EXPERIENCE')]}),
    ('IT Auditor', {'entities': [(0, 10, 'EXPERIENCE')]}),
    ('SEO Specialist', {'entities': [(0, 14, 'EXPERIENCE')]}),
    ('IT Sales Executive', {'entities': [(0, 18, 'EXPERIENCE')]}),
    ('IT Recruiter', {'entities': [(0, 12, 'EXPERIENCE')]}),
    ('Blockchain Developer', {'entities': [(0, 20, 'EXPERIENCE')]}),
    ('IT Operations Manager', {'entities': [(0, 20, 'EXPERIENCE')]}),
    ('Frontend UI Developer', {'entities': [(0, 20, 'EXPERIENCE')]}),
    ('Data Privacy Officer', {'entities': [(0, 20, 'EXPERIENCE')]}),
    ('SAP Consultant', {'entities': [(0, 14, 'EXPERIENCE')]}),
    ('Systems Integrator', {'entities': [(0, 18, 'EXPERIENCE')]}),
    ('Cybersecurity Specialist', {'entities': [(0, 25, 'EXPERIENCE')]}),
    ('Computer Vision Engineer', {'entities': [(0, 23, 'EXPERIENCE')]})
]

In [66]:
all_train_DATA = []
all_train_DATA.extend(skills_DATA)
all_train_DATA.extend(tenure_DATA)
all_train_DATA.extend(experience_DATA)
all_train_DATA.extend(education_DATA)
all_train_DATA.extend(socials_DATA)
all_train_DATA.extend(other_label_DATA)
all_train_DATA.extend(involvement_DATA)
all_train_DATA.extend(course_DATA)

In [67]:
len(all_train_DATA)

1473

In [73]:
nlp = spacy.load("en_core_web_sm")
ner = nlp.get_pipe('ner')
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [74]:
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
unaffected_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
unaffected_pipes

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer']

In [75]:
# Add the new label to ner
Label1 = "TENURE"
Label2 = "SKILLS"
Label3 = "COURSE"
Label4 = "INVOLVEMENT"
Label5 = "EXPERIENCE"
Label6 = "SOCIALS"
Label7 = "EDUCATION"

for i in range(1,8):
    ner.add_label(globals()[f"Label{i}"])

# # Resume training
optimizer = nlp.resume_training()
move_names = list(ner.move_names)

ner.labels

('CARDINAL',
 'COURSE',
 'DATE',
 'EDUCATION',
 'EVENT',
 'EXPERIENCE',
 'FAC',
 'GPE',
 'INVOLVEMENT',
 'LANGUAGE',
 'LAW',
 'LOC',
 'MONEY',
 'NORP',
 'ORDINAL',
 'ORG',
 'PERCENT',
 'PERSON',
 'PRODUCT',
 'QUANTITY',
 'SKILLS',
 'SOCIALS',
 'TENURE',
 'TIME',
 'WORK_OF_ART')

In [76]:
print("BEFORE")
view_results(nlp(resume))

BEFORE
PERSON: Rupesh Ghimire rupeshghimire007@gmail.com
PERSON: Pashchimanchal Campus
ORG: IOE
PERSON: TU Bachelor
DATE: 2019
PERSON: rupeshghimire7
ORG: rupeshghimire7 INVOLVEMENTS NAAMI  ANAIS-Student
DATE: 2023 - Jun 2023
DATE: Jan 2023 - Feb 2023
PERSON: Django
ORG: MVC
PERSON: Templates
FAC: Rest Framework
GPE: Flask
DATE: May 2022- Jul 2022
ORG: BookSuggestor COURSEWORKSDBMS Project Leveraged RAW SQL
GPE: Django
PERSON: Django
ORG: UNDERGRADUATETelegram Chat Bot Data Structures & Algorithms Operating Systems Database Management System Software Engineering Artificial Intelligence Computer NetworksPersonal Project
PERSON: Machine Learning
PERSON: Numpy Matplotlib
PERSON: Seaborn  Scikit-Learn
PERSON: Keras JavaScript  ReactJS
ORG: ML Projects Personal Project Collection of Regression and Classification
CARDINAL: 10,000,000
DATE: 2030
PERSON: OnCampus
CARDINAL: 1st
PERSON: Participant
PERSON: Wildcard Golden Jubilee Scholarship Scheme Awarded
GPE: Embassy
GPE: India
GPE: Kathmandu


In [77]:
trainer(DATA=all_train_DATA)





































Losses {'ner': 1345.3260859527709}
Losses {'ner': 1273.844510222252}
Losses {'ner': 1127.7232023920496}
Losses {'ner': 997.623292278911}
Losses {'ner': 968.6415489578585}
Losses {'ner': 984.0828607397785}
Losses {'ner': 967.5347789653861}
Losses {'ner': 807.6846313602373}
Losses {'ner': 746.8167370147238}
Losses {'ner': 770.7089653927983}


In [79]:
print("AFTER")
view_results(nlp(resume))

AFTER
ORG: Rupesh Ghimire rupeshghimire007@gmail.com  
TENURE: Jan 2023 - Feb 2023  
COURSE: with Python and Django's MVC architecture, ORM, Templates and Rest Framework.
COURSE: BookSuggestor COURSEWORKSDBMS Project Leveraged RAW SQL for database management using MySQL in Django bypassing Django's ORM.
QUANTITY: 10,000,000 people
ORDINAL: 1st
