In [3]:
# Import spaCy and load the language library
import spacy
from spacy import displacy
from pathlib import Path

nlp = spacy.load('en_core_web_sm') 

# Create a string with quotes
mystring = '"We\'re moving to L.A.!"'
print(mystring)

# Create a Doc object and explore tokens
doc = nlp(mystring)
for token in doc:
    print(token.text, end=' | ')

# Another example
doc2 = nlp(u"We're here to help! Send snail-mail, email support@oursite.com or visit us at http://www.oursite.com!")
for t in doc2:
    print(t)

# Numbers and currency
doc3 = nlp(u'A 5km NYC cab ride costs $10.30')
for t in doc3:
    print(t)

# Exceptions
doc4 = nlp(u"Let's visit St. Louis in the U.S. next year.")
for t in doc4:
    print(t)

# Counting tokens and vocab
print(len(doc))          # number of tokens
print(len(doc.vocab))    # vocab size

# Token indexing
doc5 = nlp(u'It is better to give than to receive.')
print(doc5[2])           # third token
print(doc5[2:5])         # slice
print(doc5[-4:])         # last 4 tokens

# Named Entities
doc8 = nlp(u'Apple to build a Hong Kong factory for $6 million')
for token in doc8:
    print(token.text, end=' | ')

print('\n----')
for ent in doc8.ents:
    print(ent.text+' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))

print("Number of entities:", len(doc8.ents))

# Noun Chunks
doc9 = nlp(u"Autonomous cars shift insurance liability toward manufacturers.")
for chunk in doc9.noun_chunks:
    print(chunk.text)

doc10 = nlp(u"Red cars do not carry higher insurance rates.")
for chunk in doc10.noun_chunks:
    print(chunk.text)

doc11 = nlp(u"He was a one-eyed, one-horned, flying, purple people-eater.")
for chunk in doc11.noun_chunks:
    print(chunk.text)

# Dependency Visualization in Jupyter
doc = nlp(u'Apple is going to build a U.K. factory for $6 million.')
displacy.render(doc, style='dep', jupyter=True, options={'distance': 110})

# Dependency Visualization in Spyder (save to file)
doc1 = nlp("This is a sentence.")
doc2 = nlp("Ramayana was written thousands of years ago by Rishi Valmiki")

# Render as HTML page
html = displacy.render([doc1, doc2], style="dep", page=True)




"We're moving to L.A.!"
" | We | 're | moving | to | L.A. | ! | " | We
're
here
to
help
!
Send
snail
-
mail
,
email
support@oursite.com
or
visit
us
at
http://www.oursite.com
!
A
5
km
NYC
cab
ride
costs
$
10.30
Let
's
visit
St.
Louis
in
the
U.S.
next
year
.
8
794
better
better to give
than to receive.
Apple | to | build | a | Hong | Kong | factory | for | $ | 6 | million | 
----
Apple - ORG - Companies, agencies, institutions, etc.
Hong Kong - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit
Number of entities: 3
Autonomous cars
insurance liability
manufacturers
Red cars
higher insurance rates
He
a one-eyed, one-horned, flying, purple people-eater


FileNotFoundError: [Errno 2] No such file or directory: 'E:\\muthu 2\\NLPSPYDER\\01-NLP-Python-Basics\\sentence1.svg'