In [1]:
import spacy

nlp = spacy.load('en_core_web_sm')

In [5]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [18]:
doc = nlp('Tesla Inc is going to acquire Twitter Inc for $44 billion')

for ent in doc.ents:
    print(ent, ' | ', ent.label_, ' | ', spacy.explain(ent.label_))

Tesla Inc  |  ORG  |  Companies, agencies, institutions, etc.
Twitter Inc  |  ORG  |  Companies, agencies, institutions, etc.
$44 billion  |  MONEY  |  Monetary values, including unit


In [19]:
doc.ents

(Tesla Inc, Twitter Inc, $44 billion)

In [20]:
from spacy import displacy

displacy.render(doc, style='ent')

In [26]:
nlp.pipe_labels['ner']  # named entity which are supported by spacy

['CARDINAL',
 'DATE',
 'EVENT',
 'FAC',
 'GPE',
 'LANGUAGE',
 'LAW',
 'LOC',
 'MONEY',
 'NORP',
 'ORDINAL',
 'ORG',
 'PERCENT',
 'PERSON',
 'PRODUCT',
 'QUANTITY',
 'TIME',
 'WORK_OF_ART']

In [39]:
doc2 = nlp('Michael Bloomberg founded Bloomberg Inc in 1982')

for ent in doc2.ents:
    print(ent, ' | ',ent.label_, ' | ',spacy.explain(ent.label_))

Michael Bloomberg  |  PERSON  |  People, including fictional
Bloomberg Inc  |  ORG  |  Companies, agencies, institutions, etc.
1982  |  DATE  |  Absolute or relative dates or periods


In [44]:
# Building customizeable entities

doc3 = nlp("Tesla is going to acquire Twitter for $45 billion.")

for ent in doc3.ents:
    print(ent, ' | ', ent.label_)

Tesla  |  ORG
Twitter  |  PRODUCT
$45 billion  |  MONEY


In [48]:
doc3[0], type(doc3[0])

(Tesla, spacy.tokens.token.Token)

In [50]:
doc3[2:5], type(doc3[2:5])

(going to acquire, spacy.tokens.span.Span)

In [45]:
type(ent)

spacy.tokens.span.Span

In [53]:
from spacy.tokens import Span

s1 = Span(doc3, 0, 1, label = 'ORG')
s2 = Span(doc3, 5, 6, label = 'ORG')

doc3.set_ents([s1,s2], default = 'unmodified')   # Here we changed the entity type for Twitter to 'ORG' from 'PROD'

In [54]:
for ent in doc3.ents:
    print(ent,' | ', ent.label_)

Tesla  |  ORG
Twitter  |  ORG
$45 billion  |  MONEY
