In [1]:
import spacy

nlp = spacy.load("en_core_web_sm")
text = "Why Apple is looking at buying U.K. startup for $1 billion ?"
doc = nlp(text)

for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)


Apple 4 9 ORG
U.K. 31 35 GPE
$1 billion 48 58 MONEY


In [2]:
nlp = spacy.load("en_core_web_sm")
doc = nlp("San Francisco considers banning sidewalk delivery robots")

# document level
ents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents]
print(ents)

# token level
ent_san = [doc[0].text, doc[0].ent_iob_, doc[0].ent_type_]
ent_francisco = [doc[1].text, doc[1].ent_iob_, doc[1].ent_type_]
print(ent_san)  # ['San', 'B', 'GPE']
print(ent_francisco)  # ['Francisco', 'I', 'GPE']

[('San Francisco', 0, 13, 'GPE')]
['San', 'B', 'GPE']
['Francisco', 'I', 'GPE']


In [None]:
from spacy import displacy

text = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."

nlp = spacy.load("en_core_web_sm")
doc = nlp(text)
displacy.serve(doc, style="ent")




Using the 'ent' visualizer
Serving on http://0.0.0.0:5000 ...



In [None]:
import nltk

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger_eng')  
nltk.download('maxent_ne_chunker')
nltk.download('maxent_ne_chunker_tab')   # NEW
nltk.download('words')

 
# Step Two: Load Data
 
sentence = "WASHINGTON -- In the wake of a string of abuses by New York police officers in the 1990s, Loretta E. Lynch, the top federal prosecutor in Brooklyn, spoke forcefully about the pain of a broken trust that African-Americans felt and said the responsibility for repairing generations of miscommunication and mistrust fell to law enforcement."

# Step Three: Tokenise, find parts of speech and chunk words 

for sent in nltk.sent_tokenize(sentence):
  for chunk in nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sent))):
     if hasattr(chunk, 'label'):
        print(chunk.label(), ' '.join(c[0] for c in chunk))

In [None]:
import spacy
import spacy.cli 

# Download and load the large model
spacy.cli.download("en_core_web_lg")
sp_lg = spacy.load("en_core_web_lg")

def spacy_large_ner(document: str):
    return {(ent.text.strip(), ent.label_) for ent in sp_lg(document).ents}

# Example usage
example_document = "Apple is looking at buying a U.K. startup for $1 billion"
print(spacy_large_ner(example_document))
