In [4]:
#Lemmatization
import spacy
nlp = spacy.load('en')
doc = nlp(u'this product integrates both libraries for downloading and applying patches')
for token in doc:
  print(token.text, token.lemma_)

this this
product product
integrates integrate
both both
libraries library
for for
downloading download
and and
applying apply
patches patch


In [5]:
#Applying Lemmatization for Meaning Recognition
import spacy
from spacy.symbols import ORTH, LEMMA
nlp = spacy.load('en')
doc = nlp(u'I am flying to Minsk')
print([w.text for w in doc])
special_case = [{ORTH: u'Minsk', LEMMA: u'Moskau'}]
nlp.tokenizer.add_special_case(u'Minsk', special_case)
print([w.lemma_ for w in nlp(u'I am flying to Minsk')])

['I', 'am', 'flying', 'to', 'Minsk']
['-PRON-', 'be', 'fly', 'to', 'Moskau']


In [9]:
#Using Part-of-Speech Tags to Find Relevant Verbs
import spacy
nlp = spacy.load('en')
doc = nlp(u'I have flown to LA. Now I am flying to Frisco.')
print([w.text for w in doc if w.tag_== 'VBG' or w.tag_== 'VB'])
print([w.text for w in doc if w.pos_ == 'PROPN'])

['flying']
['LA', 'Frisco']


In [24]:
#Syntactic Relations
import spacy
nlp = spacy.load('en')
doc = nlp(u'I have flown to LA. Now I am flying to Frisco.')
for token in doc:
  print(token.text, token.pos_, token.dep_)

I PRON nsubj
have AUX aux
flown VERB ROOT
to ADP prep
LA PROPN pobj
. PUNCT punct
Now ADV advmod
I PRON nsubj
am AUX aux
flying VERB ROOT
to ADP prep
Frisco PROPN pobj
. PUNCT punct


In [25]:
#When you print this line, you’ll see how words in the discourse sentences 
#are connected to each other by syntactic dependencies.
for token in doc:
  print(token.head.text, token.dep_, token.text)

flown nsubj I
flown aux have
flown ROOT flown
flown prep to
to pobj LA
flown punct .
flying advmod Now
flying nsubj I
flying aux am
flying ROOT flying
flying prep to
to pobj Frisco
flying punct .


In [14]:
#The following script locates words that are assigned to those two dependency labels:
import spacy
nlp = spacy.load('en')
doc = nlp(u'I have flown to LA. Now I am flying to Frisco.')
for sent in doc.sents:
    print([w.text for w in sent if w.dep_ == 'ROOT' or w.dep_ == 'pobj'])

['flown', 'LA']
['flying', 'Frisco']


In [23]:
print([w.lemma_ for w in nlp(u'I am flying to San Francisco') if w.dep_ == 'ROOT' or w.dep_ == 'pobj' or (w.dep_ == 'compound' and w.head.dep_ =='ROOT' or w.head.dep_ == 'pobj')])

['fly', 'San', 'Francisco']


In [27]:
#Named Entity Recognition
import spacy
nlp = spacy.load('en')
doc = nlp(u'I have flown to LA. Now I am flying to Frisco.')
for token in doc:
    if token.ent_type != 0:
        print(token.text, token.ent_type_)

LA GPE
Frisco ORG
