In [3]:
# import spacy
import spacy

In [4]:
nlp = spacy.load("en_core_web_sm")

In [7]:
sentence = '"Mr. O\'Neill thinks that the boys\' stories about Chile\'s capital aren\'t amusing."'

In [8]:
print(sentence )

"Mr. O'Neill thinks that the boys' stories about Chile's capital aren't amusing."


In [9]:
doc_object = nlp(sentence)

In [12]:
for token in doc_object:
    print(token)

"
Mr.
O'Neill
thinks
that
the
boys
'
stories
about
Chile
's
capital
are
n't
amusing
.
"


In [15]:
for token in doc_object:
    print(token.text, "(",token.pos_, ")", end=" | ")

" ( PUNCT ) | Mr. ( PROPN ) | O'Neill ( PROPN ) | thinks ( VERB ) | that ( ADP ) | the ( DET ) | boys ( NOUN ) | ' ( PART ) | stories ( NOUN ) | about ( ADP ) | Chile ( PROPN ) | 's ( PART ) | capital ( NOUN ) | are ( VERB ) | n't ( ADV ) | amusing ( ADJ ) | . ( PUNCT ) | " ( PUNCT ) | 

In [19]:
sentence = "It is bes to access our website from 9 a.m. to 1 p.m. every weeknd.\
The address is www.mywebsite.ie."

In [20]:
doc_object = nlp(sentence)

In [21]:
for token in doc_object:
    print(token.text, "(",token.pos_, ")", end=" | ")

It ( PRON ) | is ( VERB ) | bes ( ADJ ) | to ( PART ) | access ( VERB ) | our ( ADJ ) | website ( NOUN ) | from ( ADP ) | 9 ( NUM ) | a.m. ( NOUN ) | to ( ADP ) | 1 ( NUM ) | p.m. ( ADV ) | every ( DET ) | weeknd ( NOUN ) | . ( PUNCT ) | The ( DET ) | address ( NOUN ) | is ( VERB ) | www.mywebsite.ie ( NOUN ) | . ( PUNCT ) | 

In [22]:
sentence = "I will about 20km from here. Taxi will cost around £50 or $60"
doc_object = nlp(sentence)
for token in doc_object:
    print(token.text, "(",token.pos_, ")", end=" | ")

I ( PRON ) | will ( VERB ) | about ( ADV ) | 20 ( NUM ) | km ( NOUN ) | from ( ADP ) | here ( ADV ) | . ( PUNCT ) | Taxi ( PROPN ) | will ( VERB ) | cost ( VERB ) | around ( ADP ) | £ ( SYM ) | 50 ( NUM ) | or ( CCONJ ) | $ ( SYM ) | 60 ( NUM ) | 

In [23]:
len(doc_object)

17

In [24]:
len(doc_object.vocab)

57853

In [28]:
doc_object = nlp ("I really like working with words!")
for token in doc_object:
    print(token.text, "(",token.pos_, ")", end=" | ")

I ( PRON ) | really ( ADV ) | like ( VERB ) | working ( VERB ) | with ( ADP ) | words ( NOUN ) | ! ( PUNCT ) | 

In [39]:
doc_object[-2:]

words!

In [41]:
# NER

doc_object = nlp ("Samsung in Ireland are pleased with their \
new folding scren that they release after a $9 million investment")
for token in doc_object:
    print(token.text, "(",token.pos_, ")", end=" | ")

Samsung ( PROPN ) | in ( ADP ) | Ireland ( PROPN ) | are ( VERB ) | pleased ( ADJ ) | with ( ADP ) | their ( ADJ ) | new ( ADJ ) | folding ( NOUN ) | scren ( NOUN ) | that ( ADP ) | they ( PRON ) | release ( VERB ) | after ( ADP ) | a ( DET ) | $ ( SYM ) | 9 ( NUM ) | million ( NUM ) | investment ( NOUN ) | 

In [44]:
for entity in doc_object.ents:
    print(entity, "(", entity.label_, ")", spacy.explain(entity.label_) , end=" \n ")

Samsung ( ORG ) Companies, agencies, institutions, etc. 
 Ireland ( GPE ) Countries, cities, states 
 $9 million ( MONEY ) Monetary values, including unit 
 

In [87]:
def show_entities(sentence):
    doc_object = nlp(sentence)
    if (len(doc_object.ents) > 0):
        for entity in doc_object.ents:
            print("Entity : ", entity, 5*' ' ,\
                  ", Entity Label : ", entity.label_, 5*' ', \
                  ", Explanation: ", spacy.explain(entity.label_) , 5*' ', \
                  end=" \n ")
    else:
        print("No NER present")

In [88]:
show_entities("I like my")

No NER present


In [89]:
show_entities("I like my LYIT")

Entity :  LYIT       , Entity Label :  ORG       , Explanation:  Companies, agencies, institutions, etc.       
 

In [97]:
def show_noun_chunks(sentence):
    doc_object = nlp(sentence)
    for chunk in doc_object.noun_chunks:
        print("Chunk : ", chunk, 5*' ' ,\
        ", Entity Label : ", chunk.root.text, 5*' ', \
        ", Explanation: ", spacy.explain(chunk.root.dep_) , 5*' ', \
        ", Related: ", spacy.explain(chunk.root.head.text) , 5*' ', \
        end=" \n ")
   

In [98]:
show_noun_chunks("I like my")

Chunk :  I       , Entity Label :  I       , Explanation:  nominal subject       , Related:  None       
 

In [99]:
show_noun_chunks("I like my LYIT")

Chunk :  I       , Entity Label :  I       , Explanation:  nominal subject       , Related:  None       
 Chunk :  my LYIT       , Entity Label :  LYIT       , Explanation:  direct object       , Related:  None       
 

In [116]:
from spacy import displacy

doc_object = nlp('"Autonomouns cars shift insurance liability towards manufacturer"')

In [121]:
displacy.render(doc_object,style = "dep", jupyter = True, options= {"distance" :90, 
                                                                   "color" : "Blue",
                                                                   "arrow_stroke" : 1,
                                                                   "arrow_spacing": 20,
                                                                   "word_spacing": 50,
                                                                   "compact" : True})


In [123]:
my_text_file = open("noun_chunks.txt")
sentence = my_text_file.read()


In [124]:
show_entities(sentence)

Entity :  Sebastian Thrun       , Entity Label :  PERSON       , Explanation:  People, including fictional       
 Entity :  Google       , Entity Label :  ORG       , Explanation:  Companies, agencies, institutions, etc.       
 Entity :  2007       , Entity Label :  DATE       , Explanation:  Absolute or relative dates or periods       
 Entity :  
       , Entity Label :  GPE       , Explanation:  Countries, cities, states       
 Entity :  American       , Entity Label :  NORP       , Explanation:  Nationalities or religious or political groups       
 Entity :  Thrun       , Entity Label :  PERSON       , Explanation:  People, including fictional       
 Entity :  Recode       , Entity Label :  ORG       , Explanation:  Companies, agencies, institutions, etc.       
 Entity :  earlier this week       , Entity Label :  DATE       , Explanation:  Absolute or relative dates or periods       
 Entity :  
       , Entity Label :  GPE       , Explanation:  Countries, cities, states     

In [132]:
doc_object = nlp (sentence)

displacy.render(doc_object,style = "dep", jupyter = True, options= {"distance" :90, 
                                                                   "color" : "Blue",
                                                                   "arrow_stroke" : 1,
                                                                   "arrow_spacing": 1,
                                                                   "word_spacing": 5,
                                                                   "compact" : True})

In [131]:

displacy.render(doc_object,style = "ent", jupyter = True)