In [17]:
# Import spaCy and load the language library
import spacy
nlp = spacy.load('en_core_web_sm')

In [18]:
# Create a string that includes opening and closing quotation marks
mystring = '"We\'re moving to L.A!"'
print(mystring)

"We're moving to L.A!"


In [19]:
# Create a Doc object and explore tokens
doc = nlp(mystring)

for token in doc:
    print(token.text,end='|')

"|We|'re|moving|to|L.A|!|"|

In [20]:
doc2 = nlp(u"We're here to help! Send snail-mail, email support@oursite.com or visit us at http:\\www.oursite.com!")

In [21]:
for t in doc2:
    print(t)

We
're
here
to
help
!
Send
snail
-
mail
,
email
support@oursite.com
or
visit
us
at
http:\www.oursite.com
!


In [22]:
doc3 = nlp(u'A 5km NYC cab ride costs $10.30')

for t in doc3:
    print(t)

A
5
km
NYC
cab
ride
costs
$
10.30


In [23]:
doc4 = nlp(u"Let's visit St. Louis in the U.S. next year.")
for t in doc4:
    print(t)

Let
's
visit
St.
Louis
in
the
U.S.
next
year
.


# Counting Token

In [24]:
len(doc)

8

In [25]:
len(doc.vocab)

794

In [26]:
# Tokens can be retrieved by index position and slice.

In [27]:
doc5 = nlp(u'It is better to given then to receive')

In [28]:
# Retrieve three tokens from the middle 
doc5[2:5]

better to given

In [29]:
# Retrieve last four token
doc5[-4:]

given then to receive

# Tokens cannot be reassigned

In [30]:
doc6 = nlp(u'My dinner was horrible.')
doc7 = nlp(u'Your dinner was delicious.')

In [31]:
# Try to change 'My dinner was horrible' to My dinner was delicious'
doc6[3] = doc7[3]

TypeError: 'spacy.tokens.doc.Doc' object does not support item assignment

# Named Entities

In [33]:
doc8 = nlp(u'Apple to build a Hong Kong factory for $6 million')
for token in doc8:
    print(token.text,end='|')
print('\----')
for ent in doc8.ents:
    print(ent.text+' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))

Apple|to|build|a|Hong|Kong|factory|for|$|6|million|\----
Apple - ORG - Companies, agencies, institutions, etc.
Hong Kong - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [34]:
len(doc8.ents)

3

# Noun Chunks

In [35]:
doc9 = nlp(u'Autonomous cars shift insurance liability toward manufactures.')
for chunk in doc9.noun_chunks:
    print(chunk.text)

Autonomous cars
insurance liability
manufactures


In [36]:
doc10 = nlp(u'Red cars do not carry higher insurance rates.')
for chunk in doc10.noun_chunks:
    print(chunk.text)

Red cars
higher insurance rates


In [37]:
doc11 = nlp(u'He was a one-eyed, one-horned, flying, purple people-eater.')
for chunks in doc11.noun_chunks:
    print(chunks.text)

He
a one-eyed, one-horned, flying, purple people-eater


# Visualization the dependency parse

In [38]:
from spacy import displacy
doc = nlp(u'Apple is going to build a U.K. factory for $6 million.')
displacy.render(doc,style='dep', jupyter=True, options={'distance':110})

In [39]:
# Visua

In [40]:
doc = nlp(u'Over the last quater Apple sold nearly 20 thousand iPods for a profit of $6 million.')
displacy.render(doc,style='ent', jupyter = True)