In [None]:
!pip install -U spacy

In [None]:
!python3 -m spacy download en_core_web_sm

In [2]:
query1 = "Michael Collins was the Command Module Pilot for the Apollo 11 mission in 1969"
query2 = 'The Government Code and Cypher School was located at Bletchley Park'

In [1]:
import spacy

"""
https://github.com/codebasics/nlp-tutorials/blob/main/8_NER/nlp_tutorial_NER.ipynb

List of all entities https://spacy.io/models/en or 
>>> nlp.pipe_labels['ner']
"""

nlp = spacy.load("en_core_web_sm")
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [4]:
doc = nlp(query1)

print(doc.ents)

(Michael Collins, the Command Module Pilot, Apollo, 1969)


In [7]:
for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))
    print(type(ent.label_))

Michael Collins  |  PERSON  |  People, including fictional
<class 'str'>
the Command Module Pilot  |  ORG  |  Companies, agencies, institutions, etc.
<class 'str'>
Apollo  |  ORG  |  Companies, agencies, institutions, etc.
<class 'str'>
1969  |  DATE  |  Absolute or relative dates or periods
<class 'str'>


In [6]:
print(type(doc.ents[0]))

<class 'spacy.tokens.span.Span'>


In [16]:
print(str(doc.ents[0]))

Michael Collins


In [9]:
doc = nlp(query2)

print(doc.ents)
for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", spacy.explain(ent.label_))

(Turing,)
Turing  |  ORG  |  Companies, agencies, institutions, etc.


In [None]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")

nlp = pipeline("ner", model=model, tokenizer=tokenizer)

In [None]:
!pip install stanza

In [1]:
import stanza
"""
https://stanfordnlp.github.io/stanza/getting_started.html
"""
stanza.download('en') 

  from .autonotebook import tqdm as notebook_tqdm
Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.10.0.json: 426kB [00:00, 3.26MB/s]                    
2025-04-15 02:10:47 INFO: Downloaded file to /Users/kseniia/stanza_resources/resources.json
2025-04-15 02:10:47 INFO: Downloading default packages for language: en (English) ...
2025-04-15 02:10:49 INFO: File exists: /Users/kseniia/stanza_resources/en/default.zip
2025-04-15 02:10:51 INFO: Finished downloading models and saved to /Users/kseniia/stanza_resources


In [2]:
nlp = stanza.Pipeline('en') 

2025-04-15 02:10:51 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES
Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.10.0.json: 426kB [00:00, 3.08MB/s]                    
2025-04-15 02:10:52 INFO: Downloaded file to /Users/kseniia/stanza_resources/resources.json
2025-04-15 02:10:53 INFO: Loading these models for language: en (English):
| Processor    | Package                   |
--------------------------------------------
| tokenize     | combined                  |
| mwt          | combined                  |
| pos          | combined_charlm           |
| lemma        | combined_nocharlm         |
| constituency | ptb3-revised_charlm       |
| depparse     | combined_charlm           |
| sentiment    | sstplus_charlm            |
| ner          | ontonotes-ww-multi_charlm |

2025-04-15 02:10:53 I

In [3]:
doc = nlp("During World War II, Turing worked for the Government Code and Cypher School at Bletchley Park, Britain's codebreaking centre that produced Ultra intelligence")
for i in doc.ents:
    print(i.text, i.type)

print(doc.ents)

World War II EVENT
Turing PERSON
the Government Code ORG
Cypher School ORG
Bletchley Park FAC
Britain's GPE
Ultra ORG
[{
  "text": "World War II",
  "type": "EVENT",
  "start_char": 7,
  "end_char": 19
}, {
  "text": "Turing",
  "type": "PERSON",
  "start_char": 21,
  "end_char": 27
}, {
  "text": "the Government Code",
  "type": "ORG",
  "start_char": 39,
  "end_char": 58
}, {
  "text": "Cypher School",
  "type": "ORG",
  "start_char": 63,
  "end_char": 76
}, {
  "text": "Bletchley Park",
  "type": "FAC",
  "start_char": 80,
  "end_char": 94
}, {
  "text": "Britain's",
  "type": "GPE",
  "start_char": 96,
  "end_char": 105
}, {
  "text": "Ultra",
  "type": "ORG",
  "start_char": 140,
  "end_char": 145
}]


In [4]:
for sent in doc.sentences:
    for word in sent.words:
        print(word.text, '\t', word.lemma, '\t', word.upos)

During 	 during 	 ADP
World 	 World 	 PROPN
War 	 War 	 PROPN
II 	 II 	 NUM
, 	 , 	 PUNCT
Turing 	 Turing 	 PROPN
worked 	 work 	 VERB
for 	 for 	 ADP
the 	 the 	 DET
Government 	 Government 	 PROPN
Code 	 Code 	 PROPN
and 	 and 	 CCONJ
Cypher 	 Cypher 	 PROPN
School 	 School 	 PROPN
at 	 at 	 ADP
Bletchley 	 Bletchley 	 PROPN
Park 	 Park 	 PROPN
, 	 , 	 PUNCT
Britain 	 Britain 	 PROPN
's 	 's 	 PART
codebreaking 	 codebreaking 	 ADJ
centre 	 centre 	 NOUN
that 	 that 	 PRON
produced 	 produce 	 VERB
Ultra 	 ultra 	 ADJ
intelligence 	 intelligence 	 NOUN


In [1]:
!pip install flair



In [27]:
from flair.data import Sentence
from flair.nn import Classifier

tagger = Classifier.load('ner')

2025-04-15 03:06:50,561 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>


In [19]:
query1 = "Michael Collins was the Command Module Pilot for the Apollo 11 mission in 1969"
query2 = 'The Government Code and Cypher School was located at Bletchley Park'
query3 = "During World War II, Turing worked for the Government Code and Cypher School at Bletchley Park, Britain's codebreaking centre that produced Ultra intelligence"
query4 = "Photosynthesis is a process that converts light energy into chemical energy stored in glucose molecules"
query5 = "Albert Einstein developed the theory of relativity, which revolutionized modern physics"

In [26]:
sentence = Sentence(query1)
tagger.predict(sentence)
print(sentence)
print(type(sentence))

Sentence[14]: "Michael Collins was the Command Module Pilot for the Apollo 11 mission in 1969" → ["Michael Collins"/PER, "Apollo 11"/MISC]
<class 'flair.data.Sentence'>
