# Resume Parser using Spacy

In [1]:
# !pip install pdfminer.six

In [2]:
import spacy
import random
from spacy.training.example import Example
from spacy.util import minibatch, compounding
from pathlib import Path

2024-01-27 17:33:58.934766: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-01-27 17:34:03.072863: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-01-27 17:34:03.074410: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your 

In [3]:
from pdfminer.high_level import extract_text

def extract_text_from_pdf(pdf_path):
    return extract_text(pdf_path)

In [4]:
def parse_resume(text):
    doc = nlp(text)

    # Extract information based on spaCy's NER (Named Entity Recognition) capabilities
    entities = [(ent.text, ent.label_) for ent in doc.ents]

    return entities

In [5]:
nlp = spacy.load("en_core_web_sm")
ner = nlp.get_pipe('ner')
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [6]:
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
unaffected_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
unaffected_pipes

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer']

In [7]:
# Add the new label to ner
Label1 = "TENURE"
Label2 = "SKILLS"
Label3 = "COURSE"
Label4 = "INVOLVEMENT"
Label5 = "EXPERIENCE"
Label6 = "SOCIALS"
Label7 = "EDUCATION"
# Label8 = "ORG" # for education and work institutions

# label=[]
# for i in range(1, 6):
#     label.append(globals()[f"Label{i}"])

for i in range(1,8):
    ner.add_label(globals()[f"Label{i}"])

# # Resume training
optimizer = nlp.resume_training()
move_names = list(ner.move_names)

In [8]:
ner.labels

('CARDINAL',
 'COURSE',
 'DATE',
 'EDUCATION',
 'EVENT',
 'EXPERIENCE',
 'FAC',
 'GPE',
 'INVOLVEMENT',
 'LANGUAGE',
 'LAW',
 'LOC',
 'MONEY',
 'NORP',
 'ORDINAL',
 'ORG',
 'PERCENT',
 'PERSON',
 'PRODUCT',
 'QUANTITY',
 'SKILLS',
 'SOCIALS',
 'TENURE',
 'TIME',
 'WORK_OF_ART')

In [9]:
resume = """Rupesh Ghimire
rupeshghimire007@gmail.com | +977-9868155925

EDUCATION
Pashchimanchal Campus, IOE, TU
Bachelor in Computer Engineering
2019-Present
Fusemachines
Micro-Degree in Artificial Intelligence
2023-Present
Fellowship Scholar
LINKS
Github:// rupeshghimire7
LinkedIn:// rupesh-ghimire7
Medium:// rupeshghimire7
LeetCode:// rupeshghimire7
INVOLVEMENTS
NAAMI | ANAIS-Student Ambassador
Apr 2023 - Jun 2023
i-CES | Django Mentor
Jan 2023 - Feb 2023 | Pokhara
Made attendees familiar with Python and Django's MVC
architecture, ORM, Templates and Rest Framework.
Coding Competition (GCES) - 2023
Code with Coffee (i-CES) - 2022
PROJECTS
LIVER CIRRHOSIS PREDICTION
Fusemachines ML Final Project | (Feb 2023- Apr 2023)
Worked on the multiclass classification problem with
various classifiers to predict the stage of patients' liver.
Deployed on Flask.
BCT Study Room
Software Engineering Project | (May 2022- Jul 2022)
A django web-app to enable discussion in groups.
BookSuggestor
COURSEWORKSDBMS Project
Leveraged RAW SQL for database management using
MySQL in Django bypassing Django's ORM.
UNDERGRADUATETelegram Chat Bot
Data Structures & Algorithms
Operating Systems
Database Management System
Software Engineering
Artificial Intelligence
Computer NetworksPersonal Project
Used Telegram API for configuration and python for
implementation.
Machine Learning AlgorithmsACHIEVEMENTS
FUSEMACHINES
PROGRAMMING SKILLS
EXPERIENCED
Python | Django
Tailwind CSS | HTML5 | CSS3
INTERMEDIATE
Pytorch
MySQL
Pandas | Numpy
Matplotlib | Seaborn | Scikit-Learn
RESTful API
C/C++
FAMILIAR
Tensorflow/ Keras
JavaScript | ReactJS
ML Projects
Personal Project
Collection of Regression and Classification projects.
HultPrize 2021
To build viable food enterprises to create jobs, stimulate
economies, reimagine supply chains, and improve outcomes for
10,000,000 people by 2030.
OnCampus: 1st Runner Up
Regional Summit: Participant via Wildcard
Golden Jubilee Scholarship Scheme
Awarded from Embassy of India, Kathmandu for
Undergrad Studies.
LANGUAGES
Nepali - Native Proficiency
English - Professional Working Proficiency"""

In [10]:
resume = resume.replace('\n', ' ').replace('|', '')

### TRAINER FUNCTION

Takes list train data of form:
        
        [("Pizza is a common fast food.", {"entities": [(0, 5, "FOOD")]}),
        ("Pasta is an italian recipe", {"entities": [(0, 5, "FOOD")]})]
        
Resume Doc is spacy doc object of resume:

In [11]:
def trainer(DATA):
    
    # TRAINING THE MODEL
    with nlp.disable_pipes(*unaffected_pipes):

        # Training for n iterations
        for iteration in range(10):

            # Shuffling examples before every iteration
            random.shuffle(DATA)
            losses = {}

            # Batch up the examples using spaCy's minibatch
            batches = minibatch(DATA, size=compounding(4.0, 32.0, 1.001))

            for batch in batches:
                examples = []
                for text, annotation in batch:
                    example = Example.from_dict(nlp.make_doc(text), annotation)
                    examples.append(example)

                nlp.update(
                    examples,  
                    sgd=optimizer,
                    drop=0.5,  
                    losses=losses,
                )

            print("Losses", losses)

### Data Generator function

Label -> "SKILLS"

List of data -> ['React', 'Django', 'Machine Learning', 'AI']  

In [12]:
def data_generator(label,data_list,level=None):
    training_data = []

    for i in range(len(data_list)):
        data = data_list[i]
        start_index = 0
        end_index = start_index + len(data)
        training_data.append((data, {"entities": [(start_index, end_index, label)]}))

    return training_data

### View Results

In [13]:
def view_results(doc):
    for ent in doc.ents:
        print(f"{ent.label_}: {ent.text}")

### Creating data for tecnhical skills i.e. Entity Label : SKILLS

In [14]:
technical_skills = [
    # Programming Languages
    "Python", "Java", "C++", "JavaScript", "HTML", "CSS", "Ruby", "Go", "Swift", "Kotlin",
    "TypeScript", "Rust", "Scala", "PHP", "C#", "Objective-C",

    # Web Development
    "Django", "Flask", "Node.js", "Express.js", "React", "Angular", "Vue.js", "Next.js",
    "Spring Boot", "Ruby on Rails", "ASP.NET", "Meteor", "HTML5", "CSS3", "Bootstrap",

    # Databases
    "SQL", "MySQL", "PostgreSQL", "MongoDB", "Redis", "SQLite", "Firebase", "Cassandra",

    # Machine Learning / Data Science
    "TensorFlow", "PyTorch", "Scikit-learn", "Keras", "Pandas", "NumPy", "Matplotlib",
    "Seaborn", "NLTK", "Spacy", "Scrapy", "Beautiful Soup",

    # Cloud Computing
    "AWS", "Azure", "Google Cloud Platform (GCP)", "Docker", "Kubernetes", "Heroku",

    # DevOps
    "Jenkins", "Travis CI", "GitLab CI", "Ansible", "Terraform", "Docker Compose",

    # Version Control
    "Git", "GitHub", "Bitbucket", "GitLab",

    # Mobile Development
    "React Native", "Flutter", "Xamarin", "SwiftUI", "Android SDK",

    # Frameworks
    "React", "Angular", "Vue.js", "Django", "Flask", "Ruby on Rails", "Spring Boot", "Express.js",

    # Libraries
    "Pandas", "NumPy", "Matplotlib", "Seaborn", "NLTK", "Spacy", "Scrapy", "Beautiful Soup",

    # Frontend Technologies
    "React", "Angular", "Vue.js", "Next.js", "TypeScript", "Webpack", "Babel", "SASS",

    # Backend Technologies
    "Node.js", "Django", "Flask", "Spring Boot", "Ruby on Rails", "Express.js", "PHP",

    # Mobile Frameworks
    "React Native", "Flutter", "Xamarin", "SwiftUI", "Android SDK",

    # Networking
    "TCP/IP", "HTTP/HTTPS", "DNS", "Load Balancing", "Firewalls", "Proxy Servers",

    # Security
    "Cybersecurity", "Penetration Testing", "Cryptography", "OWASP", "SSL/TLS",

    # Operating Systems
    "Linux", "Windows", "macOS", "Unix",

    # Other Technologies
    "Blockchain", "Serverless", "Microservices", "RESTful API", "GraphQL",

    # Project Management / Agile
    "Scrum", "Kanban", "Agile", "JIRA", "Trello",

    # Miscellaneous
    "Data Manipulation", "Natural Language Processing (NLP)", "Computer Vision",
]
level = ['beginner', 'intermediate', 'expert']

In [15]:
TRAIN_DATA = data_generator("SKILLS",technical_skills, level)

for data in TRAIN_DATA[:5]:
    print(data)
for data in TRAIN_DATA[-5:]:
    print(data)
print("TRAIN_DATA length", len(TRAIN_DATA))

('Python', {'entities': [(0, 6, 'SKILLS')]})
('Java', {'entities': [(0, 4, 'SKILLS')]})
('C++', {'entities': [(0, 3, 'SKILLS')]})
('JavaScript', {'entities': [(0, 10, 'SKILLS')]})
('HTML', {'entities': [(0, 4, 'SKILLS')]})
('JIRA', {'entities': [(0, 4, 'SKILLS')]})
('Trello', {'entities': [(0, 6, 'SKILLS')]})
('Data Manipulation', {'entities': [(0, 17, 'SKILLS')]})
('Natural Language Processing (NLP)', {'entities': [(0, 33, 'SKILLS')]})
('Computer Vision', {'entities': [(0, 15, 'SKILLS')]})
TRAIN_DATA length 136


In [16]:
view_results(nlp(resume))

PERSON: Rupesh Ghimire rupeshghimire007@gmail.com
PERSON: Pashchimanchal Campus
ORG: IOE
PERSON: TU Bachelor
DATE: 2019
PERSON: rupeshghimire7
ORG: rupeshghimire7 INVOLVEMENTS NAAMI  ANAIS-Student
DATE: 2023 - Jun 2023
DATE: Jan 2023 - Feb 2023
PERSON: Django
ORG: MVC
PERSON: Templates
FAC: Rest Framework
GPE: Flask
DATE: May 2022- Jul 2022
ORG: BookSuggestor COURSEWORKSDBMS Project Leveraged RAW SQL
GPE: Django
PERSON: Django
ORG: UNDERGRADUATETelegram Chat Bot Data Structures & Algorithms Operating Systems Database Management System Software Engineering Artificial Intelligence Computer NetworksPersonal Project
PERSON: Machine Learning
PERSON: Numpy Matplotlib
PERSON: Seaborn  Scikit-Learn
PERSON: Keras JavaScript  ReactJS
ORG: ML Projects Personal Project Collection of Regression and Classification
CARDINAL: 10,000,000
DATE: 2030
PERSON: OnCampus
CARDINAL: 1st
PERSON: Participant
PERSON: Wildcard Golden Jubilee Scholarship Scheme Awarded
GPE: Embassy
GPE: India
GPE: Kathmandu
PERSON:

In [17]:
trainer(DATA= TRAIN_DATA)

Losses {'ner': 207.89096086251678}
Losses {'ner': 100.04769532120554}
Losses {'ner': 72.37185845894317}
Losses {'ner': 66.0976101893466}
Losses {'ner': 42.52660317358459}
Losses {'ner': 28.068101785829885}
Losses {'ner': 15.049703359775833}
Losses {'ner': 12.398957576528375}
Losses {'ner': 7.972060559081265}
Losses {'ner': 10.40278832094074}


In [18]:
view_results(nlp(resume))

SKILLS: Rupesh Ghimire rupeshghimire007@gmail.com
SKILLS: +977-9868155925  
SKILLS: EDUCATION Pashchimanchal Campus
SKILLS: TU Bachelor
SKILLS: in Computer Engineering 2019-Present Fusemachines Micro-Degree
SKILLS: Artificial Intelligence 2023-Present Fellowship Scholar
SKILLS: rupeshghimire7
SKILLS: rupesh-ghimire7 Medium:// rupeshghimire7
SKILLS: LeetCode:// rupeshghimire7 INVOLVEMENTS NAAMI  ANAIS-Student Ambassador Apr 2023 - Jun 2023
SKILLS: i-CES
SKILLS: Django Mentor
SKILLS: 2023 - Feb 2023
SKILLS: Pokhara Made
SKILLS: familiar
SKILLS: with
SKILLS: Python and Django's MVC
SKILLS: ,
SKILLS: ORM, Templates
SKILLS: Rest Framework
SKILLS: Coding Competition
SKILLS: 2023 Code with Coffee
SKILLS: i-CES) - 2022 PROJECTS LIVER CIRRHOSIS PREDICTION Fusemachines ML Final Project  (Feb 2023- Apr 2023) Worked
SKILLS: classifiers
SKILLS: .
SKILLS: Deployed on Flask
SKILLS: BCT Study Room Software Engineering Project
SKILLS: 2022- Jul 2022
SKILLS: web-app
SKILLS: .
SKILLS: BookSuggestor COURS

**It seems like it is showing everything as skill. We will still train our model further on more data and labels and see how it performs. If the final result obtained isn't good, we will train our data directly on resume data instead of separate data**

### Train Tenure (for eg: Jan 2023 - Jan 2024)

In [19]:
date_phrases = [
    "January 2020 - Present",
    "2018 - 2022",
    "Dec 2015 - Mar 2018",
    "July 2021 - August 2023",
    "September 2019 - Nov 2020",
    "2020 - Present",
     "Oct 2018 - Apr 2020",
    "May 2016 - September 2019",
    "June 2019 - Dec 2021",
    "2005 - 2010",
    "Mar 2017 - Present",
    "July 2022",
    "Jan 2014 - Jun 2015",
    "August 2020 - Current",
    "Fall 2018 - Spring 2022",
    "Q3 2019 - Q1 2021",
    "Dec '19 - Feb '22",
    "3/2015 - 5/2018",
    "Apr 2008",
    "Summer 2016"
]

In [20]:
TENURE_DATA = data_generator("TENURE",date_phrases, level)

for data in TENURE_DATA[:5]:
    print(data)
for data in TENURE_DATA[-5:]:
    print(data)
print("TENURE_DATA length", len(TENURE_DATA))

('Computer Vision', {'entities': [(0, 15, 'SKILLS')]})
('Android SDK', {'entities': [(0, 11, 'SKILLS')]})
('Android SDK', {'entities': [(0, 11, 'SKILLS')]})
('Unix', {'entities': [(0, 4, 'SKILLS')]})
('NumPy', {'entities': [(0, 5, 'SKILLS')]})
('macOS', {'entities': [(0, 5, 'SKILLS')]})
('CSS', {'entities': [(0, 3, 'SKILLS')]})
('Docker', {'entities': [(0, 6, 'SKILLS')]})
('Azure', {'entities': [(0, 5, 'SKILLS')]})
('Kubernetes', {'entities': [(0, 10, 'SKILLS')]})
TRAIN_DATA length 136


In [21]:
trainer(TENURE_DATA)

Losses {'ner': 69.92621201228422}
Losses {'ner': 59.01772405664411}
Losses {'ner': 51.14020713698119}
Losses {'ner': 59.6613085282479}
Losses {'ner': 69.73185156157706}
Losses {'ner': 47.085694319714094}
Losses {'ner': 23.607129734009504}
Losses {'ner': 19.894113314076094}
Losses {'ner': 15.208815962891094}
Losses {'ner': 10.083870277058534}


In [22]:
view_results(nlp(resume))

TENURE: Rupesh Ghimire rupeshghimire007@gmail.com  
TENURE: Pashchimanchal Campus, IOE, TU Bachelor in Computer Engineering 2019-Present Fusemachines Micro-Degree in Artificial Intelligence 2023-Present Fellowship Scholar LINKS Github:// rupeshghimire7
TENURE: rupesh-ghimire7 Medium:// rupeshghimire7 LeetCode:// rupeshghimire7 INVOLVEMENTS NAAMI  ANAIS-Student Ambassador Apr 2023 - Jun 2023 i-CES  Django Mentor Jan 2023 - Feb 2023  
TENURE: Pokhara Made
TENURE: Python and Django's MVC
TENURE: ORM, Templates and Rest Framework.
TENURE: Coding Competition (GCES) - 2023 Code with Coffee (i-CES) - 2022 PROJECTS LIVER CIRRHOSIS PREDICTION Fusemachines ML Final Project  (Feb 2023- Apr 2023) Worked on the multiclass classification problem
TENURE: Deployed on Flask
TENURE: BCT Study Room Software Engineering Project  (May 2022- Jul 2022)
TENURE: BookSuggestor COURSEWORKSDBMS Project Leveraged RAW SQL for database management using MySQL in Django bypassing Django's ORM
TENURE: UNDERGRADUATETele

In [24]:
IT_courses = [
    # Machine Learning
    "Machine Learning A-Zâ„¢: Hands-On Python & R In Data Science",
    "TensorFlow for Deep Learning",
    "Natural Language Processing in Python",
    "Reinforcement Learning Specialization",
    "Introduction to Deep Learning with PyTorch",
    
    # Python
    "Advanced Python Programming",
    "Flask Web Development",
    "Python GUI Programming with Tkinter",
    "Django for Beginners",
    "Python for Finance and Algorithmic Trading",
    
    # JavaScript
    "JavaScript - The Complete Guide 2022 (Beginner + Advanced)",
    "React - The Complete Guide (incl Hooks, React Router, Redux)",
    "Node.js - The Complete Guide",
    "Vue.js - The Complete Guide",
    "D3.js - Data Visualization with JavaScript",
    
    # Web Development
    "Advanced Web Developer Bootcamp",
    "GraphQL with React: The Complete Developers Guide",
    "MERN Stack - Build and Deploy Full-Stack React-Redux & Node.js",
    "Responsive Web Design Certification",
    "Progressive Web Apps (PWA) - The Complete Guide",
    
    # Graphics and Web Design
    "Adobe Illustrator CC - Essentials Training",
    "UI/UX Design - Adobe XD to WordPress Development",
    "Web Design for Beginners: Real World Coding in HTML & CSS",
    "Adobe Photoshop CC - Essentials Training Course",
    "Figma: Interactive Prototyping and Design (UI/UX)",
    
    # Web App Development
    "Full Stack Open 2021 - Deep Dive Into Modern Web Development",
    "Building Scalable APIs with GraphQL",
    "Serverless Functions and APIs using AWS Lambda",
    "Firebase - The Complete Guide",
    "Docker and Kubernetes: The Complete Guide",
    
    # AI
    "Artificial Intelligence: Business Strategies and Applications",
    "AI for Everyone",
    "Introduction to Deep Learning with Keras",
    "Building AI Powered Chatbots Without Programming",
    "AI and Machine Learning for Business",
    
    # Database Management
    "MongoDB - The Complete Developer's Guide",
    "SQL - MySQL for Data Analytics and Business Intelligence",
    "Database Design and MySQL",
    "Firebase Firestore for Web Development",
    "Neo4j Graph Databases with Cypher",
    
    # Data Analytics
    "Data Science and Machine Learning Bootcamp with R",
    "Google Analytics Certification: Become Certified & Earn More",
    "Power BI - A Complete Introduction",
    "Big Data Analytics with Spark",
    "Tableau 2020 A-Z: Hands-On Tableau Training For Data Science!",
    
    # Security and Ethical Hacking
    "Certified Information Systems Security Professional (CISSP)",
    "Ethical Hacking for Beginners",
    "CompTIA Security+ (SY0-601): Complete Course and Practice Exam",
    "Network Penetration Testing and Ethical Hacking",
    "Web Application Penetration Testing and Bug Bounty Course",
    
    # Cloud Computing
    "AWS Certified Cloud Practitioner - Full Course",
    "Microsoft Azure - Beginner's Guide + AZ-900",
    "Google Cloud Platform (GCP) - Cloud Architect",
    "Serverless Architecture on AWS",
    "DevOps on AWS: Learn CodeDeploy, AWS Elastic Beanstalk & More",
    
    # Additional Topics
    "Blockchain Basics: A Non-Technical Introduction",
    "Internet of Things (IoT) - The Complete Guide",
    "Cybersecurity Specialization",
    "Data Engineering, Big Data, and Machine Learning on GCP",
    "Complete Linux Training Course to Get Your Dream IT Job 2022",
    "5G NR (New Radio) - The Complete Course",
    "Introduction to Quantum Computing",
    "Python for Data Science",
    "Java Programming and Software Engineering",
    "Full Stack Web Development",
    "SQL Fundamentals",
    "Certified Ethical Hacker (CEH) Training",
    "AWS Certified Solutions Architect - Associate",
    "Machine Learning with Python",
    "Agile and Scrum Foundations",
    "Project Management Professional (PMP) Certification",
    "Cisco Certified Network Associate (CCNA)",
    "CompTIA A+, Security+, and Network+ Certifications",
    "Microsoft Certified: Azure, Windows, and Office Certifications",
    "Cybersecurity Essentials",
    "Data Science and Big Data Analytics",
    "DevOps Practices and Principles",
    "Network Security Fundamentals",
    "Deep Learning Specialization",
    "Google Cloud Platform (GCP) - Cloud Engineer",
    "HTML and CSS Fundamentals",
    "NoSQL Database Concepts",
    "Kanban for Software Development",
    "Certified Information Systems Security Professional (CISSP) Certification",
    "Project Management Professional (PMP) Certification",
    "Scrum Master Training",
    "Natural Language Processing (NLP)",
    "Wireless Networking Fundamentals",
    "Software-Defined Networking (SDN)",
    "Network Security and Firewall Management",
    "ITIL Foundation",
    "PRINCE2 Practitioner",
    "Certified Associate in Project Management (CAPM)",
    "Python for Data Science",
    "Java Programming and Software Engineering",
    "Full Stack Web Development",
    "SQL Fundamentals",
    "Certified Ethical Hacker (CEH) Training",
    "AWS Certified Solutions Architect - Associate",
    "Machine Learning with Python",
    "Agile and Scrum Foundations",
    "Project Management Professional (PMP) Certification",
    "Cisco Certified Network Associate (CCNA)",
    "CompTIA A+, Security+, and Network+ Certifications",
    "Microsoft Certified: Azure, Windows, and Office Certifications",
    "Cybersecurity Essentials",
    "Data Science and Big Data Analytics",
    "DevOps Practices and Principles",
    "Network Security Fundamentals",
    "Deep Learning Specialization",
    "Google Cloud Platform (GCP) - Cloud Engineer",
    "HTML and CSS Fundamentals",
    "NoSQL Database Concepts",
    "Kanban for Software Development",
    "Certified Information Systems Security Professional (CISSP) Certification",
    "Project Management Professional (PMP) Certification",
    "Scrum Master Training",
    "Natural Language Processing (NLP)",
    "Wireless Networking Fundamentals",
    "Software-Defined Networking (SDN)",
    "Network Security and Firewall Management",
    "ITIL Foundation",
    "PRINCE2 Practitioner",
    "Certified Associate in Project Management (CAPM)",
]

In [25]:
Other_label_data = [
    ('123', {'entities': [(0, 3, 'CARDINAL')]}),
    ('January 1, 2022', {'entities': [(0, 15, 'DATE')]}),
    ('World Cup', {'entities': [(0, 9, 'EVENT')]}),
    ('Eiffel Tower', {'entities': [(0, 12, 'FAC')]}),
    ('New York', {'entities': [(0, 8, 'GPE')]}),
    ('English', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Constitution', {'entities': [(0, 11, 'LAW')]}),
    ('Statue of Liberty', {'entities': [(0, 17, 'LOC')]}),
    ('$100', {'entities': [(0, 4, 'MONEY')]}),
    ('American', {'entities': [(0, 8, 'NORP')]}),
    ('1st', {'entities': [(0, 3, 'ORDINAL')]}),
    ('Microsoft', {'entities': [(0, 9, 'ORG')]}),
    ('25%', {'entities': [(0, 3, 'PERCENT')]}),
    ('John Doe', {'entities': [(0, 8, 'PERSON')]}),
    ('iPhone', {'entities': [(0, 6, 'PRODUCT')]}),
    ('5 kilograms', {'entities': [(0, 12, 'QUANTITY')]}),
    ('3:30 PM', {'entities': [(0, 7, 'TIME')]}),
    ('Mona Lisa', {'entities': [(0, 9, 'WORK_OF_ART')]}),
    ('456', {'entities': [(0, 3, 'CARDINAL')]}),
    ('June 15, 2022', {'entities': [(0, 14, 'DATE')]}),
    ('Oscars', {'entities': [(0, 6, 'EVENT')]}),
    ('Pyramids of Giza', {'entities': [(0, 16, 'FAC')]}),
    ('London', {'entities': [(0, 6, 'GPE')]}),
    ('French', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Bill of Rights', {'entities': [(0, 15, 'LAW')]}),
    ('Grand Canyon', {'entities': [(0, 12, 'LOC')]}),
    ('$5000', {'entities': [(0, 5, 'MONEY')]}),
    ('Chinese', {'entities': [(0, 7, 'NORP')]}),
    ('2nd', {'entities': [(0, 3, 'ORDINAL')]}),
    ('Google', {'entities': [(0, 6, 'ORG')]}),
    ('50%', {'entities': [(0, 3, 'PERCENT')]}),
    ('Jane Smith', {'entities': [(0, 10, 'PERSON')]}),
    ('PlayStation', {'entities': [(0, 11, 'PRODUCT')]}),
    ('10 liters', {'entities': [(0, 9, 'QUANTITY')]}),
    ('12:45 AM', {'entities': [(0, 7, 'TIME')]}),
    ('The Starry Night', {'entities': [(0, 16, 'WORK_OF_ART')]}),
    ('789', {'entities': [(0, 3, 'CARDINAL')]}),
    ('August 10, 2023', {'entities': [(0, 15, 'DATE')]}),
    ('World Series', {'entities': [(0, 12, 'EVENT')]}),
    ('The Louvre Museum', {'entities': [(0, 17, 'FAC')]}),
    ('Tokyo', {'entities': [(0, 5, 'GPE')]}),
    ('Spanish', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Civil Rights Act', {'entities': [(0, 16, 'LAW')]}),
    ('Mount Everest', {'entities': [(0, 13, 'LOC')]}),
    ('$75.50', {'entities': [(0, 6, 'MONEY')]}),
    ('Indian', {'entities': [(0, 6, 'NORP')]}),
    ('3rd', {'entities': [(0, 3, 'ORDINAL')]}),
    ('Amazon', {'entities': [(0, 6, 'ORG')]}),
    ('75.5%', {'entities': [(0, 5, 'PERCENT')]}),
    ('Mark Johnson', {'entities': [(0, 12, 'PERSON')]}),
    ('MacBook Pro', {'entities': [(0, 11, 'PRODUCT')]}),
    ('5 miles', {'entities': [(0, 7, 'QUANTITY')]}),
    ('4:00 PM', {'entities': [(0, 7, 'TIME')]}),
    ('The Last Supper', {'entities': [(0, 15, 'WORK_OF_ART')]}),
    ('1000', {'entities': [(0, 3, 'CARDINAL')]}),
    ('December 5, 2024', {'entities': [(0, 15, 'DATE')]}),
    ('Super Bowl', {'entities': [(0, 10, 'EVENT')]}),
    ('The Colosseum', {'entities': [(0, 13, 'FAC')]}),
    ('Sydney', {'entities': [(0, 6, 'GPE')]}),
    ('German', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Universal Declaration of Human Rights', {'entities': [(0, 36, 'LAW')]}),
    ('Great Wall of China', {'entities': [(0, 18, 'LOC')]}),
    ('$50000', {'entities': [(0, 6, 'MONEY')]}),
    ('Australian', {'entities': [(0, 9, 'NORP')]}),
    ('4th', {'entities': [(0, 3, 'ORDINAL')]}),
    ('NASA', {'entities': [(0, 4, 'ORG')]}),
    ('15%', {'entities': [(0, 3, 'PERCENT')]}),
    ('Emily Watson', {'entities': [(0, 12, 'PERSON')]}),
    ('PlayStation 5', {'entities': [(0, 13, 'PRODUCT')]}),
    ('20 kilograms', {'entities': [(0, 12, 'QUANTITY')]}),
    ('1:30 AM', {'entities': [(0, 7, 'TIME')]}),
    ('Starry Night by Vincent van Gogh', {'entities': [(0, 30, 'WORK_OF_ART')]}),
    ('2000', {'entities': [(0, 3, 'CARDINAL')]}),
    ('March 20, 2025', {'entities': [(0, 14, 'DATE')]}),
    ('Academy Awards', {'entities': [(0, 15, 'EVENT')]}),
    ('Machu Picchu', {'entities': [(0, 12, 'FAC')]}),
    ('Berlin', {'entities': [(0, 6, 'GPE')]}),
    ('Italian', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Declaration of Independence', {'entities': [(0, 27, 'LAW')]}),
    ('Niagara Falls', {'entities': [(0, 14, 'LOC')]}),
    ('$100000', {'entities': [(0, 7, 'MONEY')]}),
    ('Japanese', {'entities': [(0, 8, 'NORP')]}),
    ('5th', {'entities': [(0, 3, 'ORDINAL')]}),
    ('SpaceX', {'entities': [(0, 5, 'ORG')]}),
    ('30%', {'entities': [(0, 3, 'PERCENT')]}),
    ('Michael Johnson', {'entities': [(0, 15, 'PERSON')]}),
    ('iPhone 12', {'entities': [(0, 8, 'PRODUCT')]}),
    ('15 miles', {'entities': [(0, 8, 'QUANTITY')]}),
    ('6:45 PM', {'entities': [(0, 7, 'TIME')]}),
    ('Mona Lisa by Leonardo da Vinci', {'entities': [(0, 28, 'WORK_OF_ART')]}),
    ('5000', {'entities': [(0, 3, 'CARDINAL')]}),
    ('October 8, 2023', {'entities': [(0, 15, 'DATE')]}),
    ('World Cup', {'entities': [(0, 9, 'EVENT')]}),
    ('The Great Wall of China', {'entities': [(0, 22, 'FAC')]}),
    ('Paris', {'entities': [(0, 5, 'GPE')]}),
    ('Russian', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Human Rights Act', {'entities': [(0, 16, 'LAW')]}),
    ('Mount Kilimanjaro', {'entities': [(0, 17, 'LOC')]}),
    ('$80000', {'entities': [(0, 6, 'MONEY')]}),
    ('Brazilian', {'entities': [(0, 8, 'NORP')]}),
    ('6th', {'entities': [(0, 3, 'ORDINAL')]}),
    ('Microsoft', {'entities': [(0, 9, 'ORG')]}),
    ('85%', {'entities': [(0, 3, 'PERCENT')]}),
    ('Emma Watson', {'entities': [(0, 11, 'PERSON')]}),
    ('Xbox Series X', {'entities': [(0, 13, 'PRODUCT')]}),
    ('25 kilograms', {'entities': [(0, 12, 'QUANTITY')]}),
    ('2:15 PM', {'entities': [(0, 7, 'TIME')]}),
    ('The Scream by Edvard Munch', {'entities': [(0, 26, 'WORK_OF_ART')]}),
    ('123456', {'entities': [(0, 6, 'CARDINAL')]}),
    ('June 15, 2024', {'entities': [(0, 14, 'DATE')]}),
    ('Olympic Games', {'entities': [(0, 13, 'EVENT')]}),
    ('The Eiffel Tower', {'entities': [(0, 17, 'FAC')]}),
    ('Tokyo', {'entities': [(0, 5, 'GPE')]}),
    ('Chinese', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Bill of Rights', {'entities': [(0, 14, 'LAW')]}),
    ('Grand Canyon', {'entities': [(0, 12, 'LOC')]}),
    ('$120000', {'entities': [(0, 7, 'MONEY')]}),
    ('Mexican', {'entities': [(0, 7, 'NORP')]}),
    ('7th', {'entities': [(0, 3, 'ORDINAL')]}),
    ('Google', {'entities': [(0, 6, 'ORG')]}),
    ('10%', {'entities': [(0, 3, 'PERCENT')]}),
    ('John Smith', {'entities': [(0, 10, 'PERSON')]}),
    ('PlayStation 4', {'entities': [(0, 13, 'PRODUCT')]}),
    ('50 kilograms', {'entities': [(0, 12, 'QUANTITY')]}),
    ('4:30 AM', {'entities': [(0, 7, 'TIME')]}),
    ('Guernica by Pablo Picasso', {'entities': [(0, 25, 'WORK_OF_ART')]}),
    ('67890', {'entities': [(0, 5, 'CARDINAL')]}),
    ('April 10, 2025', {'entities': [(0, 14, 'DATE')]}),
    ('FIFA World Cup', {'entities': [(0, 15, 'EVENT')]}),
    ('The Louvre Museum', {'entities': [(0, 17, 'FAC')]}),
    ('Rio de Janeiro', {'entities': [(0, 13, 'GPE')]}),
    ('Arabic', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Constitution of the United States', {'entities': [(0, 31, 'LAW')]}),
    ('Mount Everest', {'entities': [(0, 12, 'LOC')]}),
    ('$150000', {'entities': [(0, 7, 'MONEY')]}),
    ('Canadian', {'entities': [(0, 8, 'NORP')]}),
    ('8th', {'entities': [(0, 3, 'ORDINAL')]}),
    ('Amazon', {'entities': [(0, 6, 'ORG')]}),
    ('15%', {'entities': [(0, 3, 'PERCENT')]}),
    ('Jane Doe', {'entities': [(0, 8, 'PERSON')]}),
    ('Nintendo Switch', {'entities': [(0, 16, 'PRODUCT')]}),
    ('75 kilograms', {'entities': [(0, 12, 'QUANTITY')]}),
    ('9:45 PM', {'entities': [(0, 7, 'TIME')]}),
    ('The Persistence of Memory by Salvador Dali', {'entities': [(0, 40, 'WORK_OF_ART')]}),
    ('987654', {'entities': [(0, 6, 'CARDINAL')]}),
    ('September 5, 2026', {'entities': [(0, 16, 'DATE')]}),
    ('Super Bowl', {'entities': [(0, 10, 'EVENT')]}),
    ('The Colosseum', {'entities': [(0, 13, 'FAC')]}),
    ('Sydney', {'entities': [(0, 6, 'GPE')]}),
    ('Japanese', {'entities': [(0, 8, 'LANGUAGE')]}),
    ('European Convention on Human Rights', {'entities': [(0, 36, 'LAW')]}),
    ('Machu Picchu', {'entities': [(0, 12, 'LOC')]}),
    ('$200000', {'entities': [(0, 7, 'MONEY')]}),
    ('Australian', {'entities': [(0, 9, 'NORP')]}),
    ('10th', {'entities': [(0, 4, 'ORDINAL')]}),
    ('Tesla', {'entities': [(0, 5, 'ORG')]}),
    ('20%', {'entities': [(0, 3, 'PERCENT')]}),
    ('Michael Johnson', {'entities': [(0, 15, 'PERSON')]}),
    ('Sony PlayStation 5', {'entities': [(0, 20, 'PRODUCT')]}),
    ('100 kilograms', {'entities': [(0, 13, 'QUANTITY')]}),
    ('3:15 AM', {'entities': [(0, 7, 'TIME')]}),
    ('Starry Night by Vincent van Gogh', {'entities': [(0, 31, 'WORK_OF_ART')]}),
    ('876543', {'entities': [(0, 6, 'CARDINAL')]}),
    ('November 20, 2027', {'entities': [(0, 17, 'DATE')]}),
    ('World Series', {'entities': [(0, 12, 'EVENT')]}),
    ('Statue of Liberty', {'entities': [(0, 18, 'FAC')]}),
    ('London', {'entities': [(0, 6, 'GPE')]}),
    ('Korean', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Geneva Conventions', {'entities': [(0, 18, 'LAW')]}),
    ('Great Barrier Reef', {'entities': [(0, 18, 'LOC')]}),
    ('$250000', {'entities': [(0, 7, 'MONEY')]}),
    ('Brazilian', {'entities': [(0, 8, 'NORP')]}),
    ('12th', {'entities': [(0, 4, 'ORDINAL')]}),
    ('Microsoft', {'entities': [(0, 9, 'ORG')]}),
    ('25%', {'entities': [(0, 3, 'PERCENT')]}),
    ('Michelle Obama', {'entities': [(0, 14, 'PERSON')]}),
    ('Nintendo 3DS', {'entities': [(0, 14, 'PRODUCT')]}),
    ('150 kilograms', {'entities': [(0, 13, 'QUANTITY')]}),
    ('6:45 PM', {'entities': [(0, 7, 'TIME')]}),
    ('The Last Supper by Leonardo da Vinci', {'entities': [(0, 35, 'WORK_OF_ART')]}),
    
    # CARDINAL
    ('One', {'entities': [(0, 3, 'CARDINAL')]}),
    ('25', {'entities': [(0, 2, 'CARDINAL')]}),
    ('Three hundred', {'entities': [(0, 13, 'CARDINAL')]}),
    ('50', {'entities': [(0, 1, 'CARDINAL')]}),
    ('Seventy-two', {'entities': [(0, 12, 'CARDINAL')]}),
    ('15', {'entities': [(0, 1, 'CARDINAL')]}),
    ('A hundred and ten', {'entities': [(0, 17, 'CARDINAL')]}),
    ('5,000', {'entities': [(0, 4, 'CARDINAL')]}),
    ('Fifty-nine', {'entities': [(0, 10, 'CARDINAL')]}),
    ('2', {'entities': [(0, 0, 'CARDINAL')]}),
    ('Sixty-seven', {'entities': [(0, 12, 'CARDINAL')]}),
    ('Eighty', {'entities': [(0, 4, 'CARDINAL')]}),
    ('Nine', {'entities': [(0, 3, 'CARDINAL')]}),
    ('20', {'entities': [(0, 1, 'CARDINAL')]}),
    ('Two hundred and fifty', {'entities': [(0, 21, 'CARDINAL')]}),
    ('10,000', {'entities': [(0, 4, 'CARDINAL')]}),
    ('Six', {'entities': [(0, 2, 'CARDINAL')]}),
    ('Thirty', {'entities': [(0, 5, 'CARDINAL')]}),
    ('3.5', {'entities': [(0, 2, 'CARDINAL')]}),
    ('Eleven', {'entities': [(0, 5, 'CARDINAL')]}),
    ('Four', {'entities': [(0, 3, 'CARDINAL')]}),
    ('45', {'entities': [(0, 1, 'CARDINAL')]}),
    ('Eight', {'entities': [(0, 4, 'CARDINAL')]}),
    ('Seventeen', {'entities': [(0, 8, 'CARDINAL')]}),
    
    # DATE
    ('January 1, 2022', {'entities': [(0, 14, 'DATE')]}),
    ('07/15/1995', {'entities': [(0, 9, 'DATE')]}),
    ('22nd November 2010', {'entities': [(0, 17, 'DATE')]}),
    ('03-05-1988', {'entities': [(0, 9, 'DATE')]}),
    ('June 10th, 2023', {'entities': [(0, 15, 'DATE')]}),
    ('12/31/2000', {'entities': [(0, 9, 'DATE')]}),
    ('Aug 5, 1972', {'entities': [(0, 11, 'DATE')]}),
    ('May 15, 2015', {'entities': [(0, 12, 'DATE')]}),
    ('03/07/1986', {'entities': [(0, 9, 'DATE')]}),
    ('September 20, 1998', {'entities': [(0, 18, 'DATE')]}),
    ('25th December, 2025', {'entities': [(0, 20, 'DATE')]}),
    ('10/05/2010', {'entities': [(0, 9, 'DATE')]}),
    ('July 3, 2019', {'entities': [(0, 11, 'DATE')]}),
    ('04-18-2005', {'entities': [(0, 9, 'DATE')]}),
    ('June 30th, 2022', {'entities': [(0, 16, 'DATE')]}),
    ('09/08/1980', {'entities': [(0, 9, 'DATE')]}),
    ('22nd October, 2017', {'entities': [(0, 19, 'DATE')]}),
    ('11-25-1996', {'entities': [(0, 9, 'DATE')]}),
    ('March 12, 2008', {'entities': [(0, 13, 'DATE')]}),
    ('08/22/2014', {'entities': [(0, 9, 'DATE')]}),
    ('14th February, 1992', {'entities': [(0, 19, 'DATE')]}),
    ('05-10-2016', {'entities': [(0, 9, 'DATE')]}),
    ('April 5, 1982', {'entities': [(0, 12, 'DATE')]}),
    ('12-07-2011', {'entities': [(0, 9, 'DATE')]}),
    ('November 8th, 2007', {'entities': [(0, 17, 'DATE')]}),
    
    # EVENT
    ('Super Bowl', {'entities': [(0, 10, 'EVENT')]}),
    ('Woodstock Festival', {'entities': [(0, 18, 'EVENT')]}),
    ('Academic Conference', {'entities': [(0, 19, 'EVENT')]}),
    ('Tech Summit', {'entities': [(0, 11, 'EVENT')]}),
    ('Oscar Awards', {'entities': [(0, 12, 'EVENT')]}),
    ('TEDx Talk', {'entities': [(0, 9, 'EVENT')]}),
    ('Product Launch', {'entities': [(0, 14, 'EVENT')]}),
    ('Book Signing', {'entities': [(0, 12, 'EVENT')]}),
    ('FIFA World Cup', {'entities': [(0, 14, 'EVENT')]}),
    ('Art Exhibition', {'entities': [(0, 15, 'EVENT')]}),
    ('Halloween Party', {'entities': [(0, 15, 'EVENT')]}),
    ('Comic-Con', {'entities': [(0, 8, 'EVENT')]}),
    ('Election Day', {'entities': [(0, 12, 'EVENT')]}),
    ('Startup Pitch', {'entities': [(0, 13, 'EVENT')]}),
    ('Graduation Ceremony', {'entities': [(0, 19, 'EVENT')]}),
    ('International Film Festival', {'entities': [(0, 26, 'EVENT')]}),
    ('Live Concert', {'entities': [(0, 12, 'EVENT')]}),
    ('Fashion Show', {'entities': [(0, 12, 'EVENT')]}),
    ('Comic Book Convention', {'entities': [(0, 21, 'EVENT')]}),
    ('New Year\'s Eve Party', {'entities': [(0, 20, 'EVENT')]}),
    ('SpaceX Launch', {'entities': [(0, 13, 'EVENT')]}),
    ('Independence Day Parade', {'entities': [(0, 23, 'EVENT')]}),
    ('Tech Workshop', {'entities': [(0, 12, 'EVENT')]}),
    ('Music Festival', {'entities': [(0, 14, 'EVENT')]}),
    ('Scientific Symposium', {'entities': [(0, 20, 'EVENT')]}),
    
    # FAC
    ('Eiffel Tower', {'entities': [(0, 13, 'FAC')]}),
    ('White House', {'entities': [(0, 11, 'FAC')]}),
    ('Taj Mahal', {'entities': [(0, 9, 'FAC')]}),
    ('Louvre Museum', {'entities': [(0, 13, 'FAC')]}),
    ('Sydney Opera House', {'entities': [(0, 18, 'FAC')]}),
    ('Great Wall of China', {'entities': [(0, 18, 'FAC')]}),
    ('Statue of Liberty', {'entities': [(0, 17, 'FAC')]}),
    ('Colosseum', {'entities': [(0, 8, 'FAC')]}),
    ('Burj Khalifa', {'entities': [(0, 12, 'FAC')]}),
    ('Machu Picchu', {'entities': [(0, 12, 'FAC')]}),
    ('Grand Canyon', {'entities': [(0, 12, 'FAC')]}),
    ('Stonehenge', {'entities': [(0, 9, 'FAC')]}),
    ('The Vatican', {'entities': [(0, 11, 'FAC')]}),
    ('Mount Rushmore', {'entities': [(0, 14, 'FAC')]}),
    ('Golden Gate Bridge', {'entities': [(0, 18, 'FAC')]}),
    ('Sagrada Familia', {'entities': [(0, 15, 'FAC')]}),
    ('Petronas Towers', {'entities': [(0, 15, 'FAC')]}),
    ('Neuschwanstein Castle', {'entities': [(0, 21, 'FAC')]}),
    ('Red Square', {'entities': [(0, 10, 'FAC')]}),
    ('Disneyland', {'entities': [(0, 9, 'FAC')]}),
    ('The Acropolis', {'entities': [(0, 14, 'FAC')]}),
    ('The Shard', {'entities': [(0, 9, 'FAC')]}),
    ('Mount Everest Base Camp', {'entities': [(0, 24, 'FAC')]}),
    ('The Guggenheim Museum', {'entities': [(0, 21, 'FAC')]}),
    
    # GPE
    ('New York', {'entities': [(0, 8, 'GPE')]}),
    ('Paris', {'entities': [(0, 5, 'GPE')]}),
    ('Tokyo', {'entities': [(0, 5, 'GPE')]}),
    ('London', {'entities': [(0, 6, 'GPE')]}),
    ('Beijing', {'entities': [(0, 7, 'GPE')]}),
    ('Berlin', {'entities': [(0, 5, 'GPE')]}),
    ('Cairo', {'entities': [(0, 5, 'GPE')]}),
    ('Rio de Janeiro', {'entities': [(0, 13, 'GPE')]}),
    ('Sydney', {'entities': [(0, 6, 'GPE')]}),
    ('Mumbai', {'entities': [(0, 6, 'GPE')]}),
    ('Moscow', {'entities': [(0, 6, 'GPE')]}),
    ('Bangkok', {'entities': [(0, 7, 'GPE')]}),
    ('Toronto', {'entities': [(0, 7, 'GPE')]}),
    ('Dubai', {'entities': [(0, 5, 'GPE')]}),
    ('Johannesburg', {'entities': [(0, 12, 'GPE')]}),
    ('Rome', {'entities': [(0, 4, 'GPE')]}),
    ('Cape Town', {'entities': [(0, 9, 'GPE')]}),
    ('Lisbon', {'entities': [(0, 6, 'GPE')]}),
    ('Buenos Aires', {'entities': [(0, 12, 'GPE')]}),
    ('Hanoi', {'entities': [(0, 5, 'GPE')]}),
    ('Montreal', {'entities': [(0, 8, 'GPE')]}),
    ('Manila', {'entities': [(0, 6, 'GPE')]}),
    ('Athens', {'entities': [(0, 6, 'GPE')]}),
    ('Stockholm', {'entities': [(0, 9, 'GPE')]}),
    ('Kathmandu', {'entities': [(0, 9, 'GPE')]}),
    ('Pokhara', {'entities': [(0, 7, 'GPE')]}),
    ('Bhaktapur', {'entities': [(0, 9, 'GPE')]}),
    ('Lalitpur', {'entities': [(0, 8, 'GPE')]}),
    ('Biratnagar', {'entities': [(0, 10, 'GPE')]}),
    ('Butwal', {'entities': [(0, 6, 'GPE')]}),
    ('Dharan', {'entities': [(0, 6, 'GPE')]}),
    ('Dhangadhi', {'entities': [(0, 9, 'GPE')]}),
    ('Bharatpur', {'entities': [(0, 9, 'GPE')]}),
    ('Hetauda', {'entities': [(0, 7, 'GPE')]}),
    ('Janakpur', {'entities': [(0, 8, 'GPE')]}),
    ('Birendranagar', {'entities': [(0, 14, 'GPE')]}),
    ('Birgunj', {'entities': [(0, 7, 'GPE')]}),
    ('Nepalgunj', {'entities': [(0, 9, 'GPE')]}),
    ('Tansen', {'entities': [(0, 6, 'GPE')]}),
    ('Gorkha', {'entities': [(0, 6, 'GPE')]}),
    ('Ilam', {'entities': [(0, 4, 'GPE')]}),
    ('Jiri', {'entities': [(0, 4, 'GPE')]}),
    ('Dolakha', {'entities': [(0, 7, 'GPE')]}),
    ('Rolpa', {'entities': [(0, 5, 'GPE')]}),
    ('Solukhumbu', {'entities': [(0, 10, 'GPE')]}),
    ('Kanchanpur', {'entities': [(0, 10, 'GPE')]}),
    ('Rukum', {'entities': [(0, 5, 'GPE')]}),
    ('Bardiya', {'entities': [(0, 6, 'GPE')]}),
    ('Mustang', {'entities': [(0, 7, 'GPE')]}),
    
    # LANGUAGE
    ('NEPALI', {'entities':[(0,6,'LANGUAGE')]}),
    ('English', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Spanish', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Chinese', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Hindi', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Arabic', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('French', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Portuguese', {'entities': [(0, 9, 'LANGUAGE')]}),
    ('Russian', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Japanese', {'entities': [(0, 8, 'LANGUAGE')]}),
    ('German', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Italian', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Korean', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Dutch', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Swedish', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Greek', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Hebrew', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Polish', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Turkish', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Vietnamese', {'entities': [(0, 9, 'LANGUAGE')]}),
    ('Thai', {'entities': [(0, 4, 'LANGUAGE')]}),
    ('Swahili', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Finnish', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Tagalog', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Norwegian', {'entities': [(0, 8, 'LANGUAGE')]}),
    ('English', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Spanish', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Chinese', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Hindi', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Arabic', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('French', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Portuguese', {'entities': [(0, 9, 'LANGUAGE')]}),
    ('Russian', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Japanese', {'entities': [(0, 8, 'LANGUAGE')]}),
    ('German', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Italian', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Korean', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Dutch', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Swedish', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Greek', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Hebrew', {'entities': [(0, 6, 'LANGUAGE')]}),
    ('Polish', {'entities': [(0, 5, 'LANGUAGE')]}),
    ('Turkish', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Vietnamese', {'entities': [(0, 9, 'LANGUAGE')]}),
    ('Thai', {'entities': [(0, 4, 'LANGUAGE')]}),
    ('Swahili', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Finnish', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Tagalog', {'entities': [(0, 7, 'LANGUAGE')]}),
    ('Norwegian', {'entities': [(0, 8, 'LANGUAGE')]}),
    
    # 
]