In [1]:
# Predict linguistic attributes: PoS tags, syntactic dependencies, named entities
# Use pre-trained model package en_core_web_sm

import spacy

In [3]:
# Load the model package by name & return the nlp object
nlp = spacy.load('en_core_web_sm')
nlp

<spacy.lang.en.English at 0x230aae83250>

In [4]:
# PoS tag indicated by attribute pos_

doc = nlp('Joey ate the pizza all by himself.')
for token in doc:
    print(token.text, token.pos_)

Joey PROPN
ate VERB
the DET
pizza NOUN
all ADV
by ADP
himself PRON
. PUNCT


In [5]:
# The predicted dependency label is indicated by dep_ attribute

# nsubj: nominal subject
# dobj: drirect object
# det: determiner
# advmod: adverbial modifier

for token in doc:
    print(token.text, token.dep_)

Joey nsubj
ate ROOT
the det
pizza dobj
all advmod
by prep
himself pobj
. punct


In [6]:
# Syntactic head token (parent token a word is attached to) is given by head attribute
for token in doc:
    print(token.text, token.head.text)

Joey ate
ate ate
the pizza
pizza ate
all by
by ate
himself by
. ate


In [8]:
# Predicting named entities
# ents attribute of doc
# Returns a span object (tokens from doc object) that contain the entity name and label

for entity in doc.ents:
    print(entity.text, entity.label_)

Joey PERSON


In [9]:
doc = nlp('Apple is looking at buying U.K. startup for $1 billion')
for entity in doc.ents:
    print(entity.text, entity.label_)

Apple ORG
U.K. GPE
$1 billion MONEY


In [15]:
text = 'It’s official: Apple is the first U.S. public company to reach a $1 trillion market value'
doc = nlp(text)
doc

It’s official: Apple is the first U.S. public company to reach a $1 trillion market value

In [29]:
for token in doc:
    token_text = token.text
    token_pos = token.pos_
    token_dep = token.dep_
    token_head = token.head.text
    
    print(f'{token_text:<12}{token_pos:<10}{token_dep:<10}{token_head:<12}')

It          PRON      nsubj     ’s          
’s          VERB      ccomp     is          
official    ADJ       dobj      ’s          
:           PUNCT     punct     is          
Apple       PROPN     nsubj     is          
is          AUX       ROOT      is          
the         DET       det       company     
first       ADJ       amod      company     
U.S.        PROPN     nmod      company     
public      ADJ       amod      company     
company     NOUN      attr      is          
to          PART      aux       reach       
reach       VERB      relcl     company     
a           DET       det       value       
$           SYM       quantmod  trillion    
1           NUM       compound  trillion    
trillion    NUM       nummod    value       
market      NOUN      compound  value       
value       NOUN      dobj      reach       


In [28]:
entities = doc.ents

for entity in entities:
    print(entity.text, entity.label_)

Apple ORG
first ORDINAL
U.S. GPE
$1 trillion MONEY


In [34]:
# The model fails to recognize iPhone X as an entity
text = 'Upcoming iPhone X release date leaked as Apple reveals pre-order'

doc = nlp(text)

for entity in doc.ents:
    print(entity.text, entity.label_)
    
print('\nMissing entity: ', doc[1:3])

Upcoming iPhone X PERSON
Apple ORG

Missing entity:  iPhone X


In [10]:
spacy.explain('GPE')

'Countries, cities, states'

In [12]:
spacy.explain('prep')

'prepositional modifier'

In [13]:
spacy.explain('pobj')

'object of preposition'

In [14]:
spacy.explain('NNP')

'noun, proper singular'