In [58]:

import spacy

def display(to_display):
    print()
    print(to_display)
    print()

def display_sentence_words(sentence, display_sentence = True):
    if display_sentence:
        display(sentence)
    print()
    for word in sentence:
        print(word.text, word.pos_, word.dep_)
    print()

def display_entities(sentence):
    print()
    for entity in sentence.ents:
        print(entity.text + ' - ' + entity.label_ + ' - ' + str(spacy.explain(entity.label_)))
    print()

def display_nouns(sentence):
    print()
    for noun in sentence.noun_chunks:
        print(noun.text)
    print()

def display_stemming(tokens, stemmer):
    print()
    for token in tokens:
        print(token + ' --> ' + stemmer.stem(token))
    print()

def display_lemmatization(sentence):
    print()
    for word in sentence:
        print(word.text + '  ===>', word.lemma_)
    print()
    

In [3]:

# load the core English language model
sp = spacy.load('en_core_web_sm')



## Basic Functionality


In [34]:

# create a small document using model
sentence = sp(u'Manchester United is looking to sign a forward for $90 million')

display_sentence_words(sentence)



Manchester United is looking to sign a forward for $90 million


Manchester PROPN compound
United PROPN nsubj
is AUX aux
looking VERB ROOT
to PART aux
sign VERB xcomp
a DET det
forward NOUN dobj
for ADP prep
$ SYM quantmod
90 NUM compound
million NUM pobj



In [35]:

sentence_2 = sp(u"Manchester United isn't looking to sign any forward.")

display_sentence_words(sentence_2, False)
    


Manchester PROPN compound
United PROPN nsubj
is AUX aux
n't PART neg
looking VERB ROOT
to PART aux
sign VERB xcomp
any DET det
forward NOUN advmod
. PUNCT punct



In [62]:

document = sp(u'Hello from Stackabuse. The site with the best Python Tutorials. What are you looking for?')

display_sentence_words(document)

print(document[4])
print(document[4].is_sent_start)
print()



Hello from Stackabuse. The site with the best Python Tutorials. What are you looking for?


Hello INTJ ROOT
from ADP prep
Stackabuse PROPN pobj
. PUNCT punct
The DET det
site NOUN ROOT
with ADP prep
the DET det
best ADJ amod
Python PROPN compound
Tutorials PROPN pobj
. PUNCT punct
What PRON pobj
are AUX aux
you PRON nsubj
looking VERB ROOT
for ADP prep
? PUNCT punct

The
True




## Tokenization


In [37]:

sentence_3 = sp(u'"They\'re leaving U.K. for U.S.A."')

display_sentence_words(sentence_3)



"They're leaving U.K. for U.S.A."


" PUNCT punct
They PRON nsubj
're AUX aux
leaving VERB ROOT
U.K. PROPN dobj
for ADP prep
U.S.A. PROPN pobj
" PUNCT punct



In [27]:

sentence_4 = sp(u"Hello, I am non-vegetarian, email me the menu at abc-xyz@gmai.com")

display(sentence_4)
display_sentence_words(sentence_4)



Hello, I am non-vegetarian, email me the menu at abc-xyz@gmai.com


Hello INTJ intj
, PUNCT punct
I PRON nsubj
am AUX ROOT
non ADJ acomp
- ADJ attr
vegetarian ADJ attr
, PUNCT punct
email VERB dep
me PRON dative
the DET det
menu NOUN dobj
at ADP prep
abc-xyz@gmai.com NOUN pobj



In [38]:

# number of tokens in sentence_4
display(len(sentence_4))



14




## Detecting Entities


In [39]:

sentence_5 = sp(u'Manchester United is looking to sign Harry Kane for $90 million')  

display_sentence_words(sentence_5)



Manchester United is looking to sign Harry Kane for $90 million


Manchester PROPN compound
United PROPN nsubj
is AUX aux
looking VERB ROOT
to PART aux
sign VERB xcomp
Harry PROPN compound
Kane PROPN dobj
for ADP prep
$ SYM quantmod
90 NUM compound
million NUM pobj



In [41]:

display_entities(sentence_5)



Manchester United - GPE - Countries, cities, states
Harry Kane - PERSON - People, including fictional
$90 million - MONEY - Monetary values, including unit




## Detecting Nouns


In [44]:

sentence_6 = sp(u'Latest Rumours: Manchester United is looking to sign Harry Kane for $90 million')

display_nouns(sentence_6)



Latest Rumours
Manchester United
Harry Kane




## Stemming


In [49]:

import nltk

from nltk.stem.porter import *
from nltk.stem.snowball import SnowballStemmer


In [52]:

tokens = ['compute', 'computer', 'computed', 'computing']

#### Porter Stemmer

print()
print('> Porter Stemmer')
display_stemming(
    tokens,
    PorterStemmer()
)

#### Snowball Stemmer

print('> Snowball Stemmer')
display_stemming(
    tokens,
    SnowballStemmer(language = 'english')
)



> Porter Stemmer

compute --> comput
computer --> comput
computed --> comput
computing --> comput

> Snowball Stemmer

compute --> comput
computer --> comput
computed --> comput
computing --> comput




## Lemmatization


In [59]:

sentence_7 = sp(u'compute computer computed computing')

display_lemmatization(sentence_7)



compute  ===> compute
computer  ===> computer
computed  ===> compute
computing  ===> computing



In [60]:

sentence_8 = sp(u'A letter has been written, asking him to be released')

display_lemmatization(sentence_8)



A  ===> a
letter  ===> letter
has  ===> have
been  ===> be
written  ===> write
,  ===> ,
asking  ===> ask
him  ===> he
to  ===> to
be  ===> be
released  ===> release

